Merge branch 'next' into 2025-MAR-ALPHA-1

2025-03-13 10:42:22 +05:30
parent 504207faa6 9547bada3a
commit cbb8755972
67 changed files with 4194 additions and 729 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,32 @@ All notable changes to Crawl4AI will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ## Version 0.5.0 (2025-03-02)
 ### Added
 - *(profiles)* Add BrowserProfiler class for dedicated browser profile management
 - *(cli)* Add interactive profile management to CLI with rich UI
 - *(profiles)* Add ability to crawl directly from profile management interface
 - *(browser)* Support identity-based browsing with persistent profiles
 - *(deep-crawling)* Add max_pages parameter to limit the number of pages crawled in all deep crawling strategies
 - *(deep-crawling)* Add score_threshold parameter to BFS and DFS strategies to filter URLs by score
 ### Changed
 - *(browser)* Refactor profile management from ManagedBrowser to BrowserProfiler class
 - *(cli)* Enhance CLI with profile selection and status display for crawling
 - *(examples)* Update identity-based browsing example to use BrowserProfiler class
 - *(docs)* Update identity-based crawling documentation
 - *(docs)* Update deep crawling documentation with max_pages and score_threshold parameters
 - *(examples)* Add example demonstrating the use of max_pages and score_threshold parameters
 ### Fixed
 - *(browser)* Fix profile detection and management on different platforms
 - *(cli)* Fix CLI command structure for better user experience
 - *(deep-crawling)* Improve BFS and DFS strategies to handle page count limits more efficiently
 ## Version 0.5.0 (2025-02-21)
--- a/README.md
+++ b/README.md
@@ -21,9 +21,9 @@
 Crawl4AI is the #1 trending GitHub repository, actively maintained by a vibrant community. It delivers blazing-fast, AI-ready web crawling tailored for LLMs, AI agents, and data pipelines. Open source, flexible, and built for real-time performance, Crawl4AI empowers developers with unmatched speed, precision, and deployment ease.  
-[✨ Check out latest update v0.4.3bx](#-recent-updates)
+[✨ Check out latest update v0.5.0](#-recent-updates)
-🎉 **Version 0.4.3bx is out!** This release brings exciting new features like a Memory Dispatcher System, Streaming Support, LLM-Powered Markdown Generation, Schema Generation, and Robots.txt Compliance! [Read the release notes →](https://docs.crawl4ai.com/blog)
+🎉 **Version 0.5.0 is out!** This major release introduces Deep Crawling with BFS/DFS/BestFirst strategies, Memory-Adaptive Dispatcher, Multiple Crawling Strategies (Playwright and HTTP), Docker Deployment with FastAPI, Command-Line Interface (CLI), and more! [Read the release notes →](https://docs.crawl4ai.com/blog)
 <details>
 <summary>🤓 <strong>My Personal Story</strong></summary>
@@ -68,7 +68,7 @@ If you encounter any browser-related issues, you can install them manually:
 python -m playwright install --with-deps chromium
 ```
-2. Run a simple web crawl:
+2. Run a simple web crawl with Python:
 ```python
 import asyncio
 from crawl4ai import *
@@ -84,6 +84,18 @@ if __name__ == "__main__":
    asyncio.run(main())
 ```
 3. Or use the new command-line interface:
 ```bash
 # Basic crawl with markdown output
 crwl https://www.nbcnews.com/business -o markdown
 # Deep crawl with BFS strategy, max 10 pages
 crwl https://docs.crawl4ai.com --deep-crawl bfs --max-pages 10
 # Use LLM extraction with a specific question
 crwl https://www.example.com/products -q "Extract all product prices"
 ```
 ## ✨ Features 
 <details>
@@ -112,6 +124,7 @@ if __name__ == "__main__":
 - 🖥️ **Managed Browser**: Use user-owned browsers with full control, avoiding bot detection.
 - 🔄 **Remote Browser Control**: Connect to Chrome Developer Tools Protocol for remote, large-scale data extraction.
 - 👤 **Browser Profiler**: Create and manage persistent profiles with saved authentication states, cookies, and settings.
 - 🔒 **Session Management**: Preserve browser states and reuse them for multi-step crawling.
 - 🧩 **Proxy Support**: Seamlessly connect to proxies with authentication for secure access.
 - ⚙️ **Full Browser Control**: Modify headers, cookies, user agents, and more for tailored crawling setups.
@@ -140,10 +153,11 @@ if __name__ == "__main__":
 <details>
 <summary>🚀 <strong>Deployment</strong></summary>
- 🐳 **Dockerized Setup**: Optimized Docker image with API server for easy deployment.
+- 🐳 **Dockerized Setup**: Optimized Docker image with FastAPI server for easy deployment.
 - 🔑 **Secure Authentication**: Built-in JWT token authentication for API security.
 - 🔄 **API Gateway**: One-click deployment with secure token authentication for API-based workflows.
 - 🌐 **Scalable Architecture**: Designed for mass-scale production and optimized server performance.
- ⚙️ **DigitalOcean Deployment**: Ready-to-deploy configurations for DigitalOcean and similar platforms.
+- ☁️ **Cloud Deployment**: Ready-to-deploy configurations for major cloud platforms.
 </details>
@@ -406,7 +420,7 @@ if __name__ == "__main__":
 ```python
 import os
 import asyncio
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LlmConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
 from crawl4ai.extraction_strategy import LLMExtractionStrategy
 from pydantic import BaseModel, Field
@@ -422,7 +436,7 @@ async def main():
        extraction_strategy=LLMExtractionStrategy(
            # Here you can use any provider that Litellm library supports, for instance: ollama/qwen2
            # provider="ollama/qwen2", api_token="no-token", 
-            llmConfig = LlmConfig(provider="openai/gpt-4o", api_token=os.getenv('OPENAI_API_KEY')), 
+            llm_config = LLMConfig(provider="openai/gpt-4o", api_token=os.getenv('OPENAI_API_KEY')), 
            schema=OpenAIModelFee.schema(),
            extraction_type="schema",
            instruction="""From the crawled content, extract all mentioned model names along with their fees for input and output tokens. 
@@ -486,21 +500,31 @@ async def test_news_crawl():
 ## ✨ Recent Updates
-   **🚀 New Dispatcher System**: Scale to thousands of URLs with intelligent **memory monitoring**, **concurrency control**, and optional **rate limiting**. (See `MemoryAdaptiveDispatcher`, `SemaphoreDispatcher`, `RateLimiter`, `CrawlerMonitor`)
+### Version 0.5.0 Major Release Highlights
 -   **⚡ Streaming Mode**: Process results **as they arrive** instead of waiting for an entire batch to complete. (Set `stream=True` in `CrawlerRunConfig`)
 -   **🤖 Enhanced LLM Integration**:
    -   **Automatic schema generation**: Create extraction rules from HTML using OpenAI or Ollama, no manual CSS/XPath needed.
    -   **LLM-powered Markdown filtering**: Refine your markdown output with a new `LLMContentFilter` that understands content relevance.
    -   **Ollama Support**: Use open-source or self-hosted models for private or cost-effective extraction.
 -   **🏎️ Faster Scraping Option**: New `LXMLWebScrapingStrategy` offers **10-20x speedup** for large, complex pages (experimental).
 -   **🤖 robots.txt Compliance**: Respect website rules with `check_robots_txt=True` and efficient local caching.
 -   **🔄 Proxy Rotation**: Built-in support for dynamic proxy switching and IP verification, with support for authenticated proxies and session persistence.
 -   **➡️ URL Redirection Tracking**: The `redirected_url` field now captures the final destination after any redirects.
 -   **🪞 Improved Mirroring**: The `LXMLWebScrapingStrategy` now has much greater fidelity, allowing for almost pixel-perfect mirroring of websites.
 -   **📈 Enhanced Monitoring**: Track memory, CPU, and individual crawler status with `CrawlerMonitor`.
 -   **📝 Improved Documentation**: More examples, clearer explanations, and updated tutorials.
-Read the full details in our [0.4.3bx Release Notes](https://github.com/unclecode/crawl4ai/blob/main/CHANGELOG.md).
+-   **🚀 Deep Crawling System**: Explore websites beyond initial URLs with three strategies:
    -   **BFS Strategy**: Breadth-first search explores websites level by level
    -   **DFS Strategy**: Depth-first search explores each branch deeply before backtracking
    -   **BestFirst Strategy**: Uses scoring functions to prioritize which URLs to crawl next
    -   **Page Limiting**: Control the maximum number of pages to crawl with `max_pages` parameter
    -   **Score Thresholds**: Filter URLs based on relevance scores
 -   **⚡ Memory-Adaptive Dispatcher**: Dynamically adjusts concurrency based on system memory with built-in rate limiting
 -   **🔄 Multiple Crawling Strategies**:
    -   **AsyncPlaywrightCrawlerStrategy**: Browser-based crawling with JavaScript support (Default)
    -   **AsyncHTTPCrawlerStrategy**: Fast, lightweight HTTP-only crawler for simple tasks
 -   **🐳 Docker Deployment**: Easy deployment with FastAPI server and streaming/non-streaming endpoints
 -   **💻 Command-Line Interface**: New `crwl` CLI provides convenient terminal access to all features with intuitive commands and configuration options
 -   **👤 Browser Profiler**: Create and manage persistent browser profiles to save authentication states, cookies, and settings for seamless crawling of protected content
 -   **🧠 Crawl4AI Coding Assistant**: AI-powered coding assistant to answer your question for Crawl4ai, and generate proper code for crawling.
 -   **🏎️ LXML Scraping Mode**: Fast HTML parsing using the `lxml` library for improved performance
 -   **🌐 Proxy Rotation**: Built-in support for proxy switching with `RoundRobinProxyStrategy`
 -   **🤖 LLM Content Filter**: Intelligent markdown generation using LLMs
 -   **📄 PDF Processing**: Extract text, images, and metadata from PDF files
 -   **🔗 URL Redirection Tracking**: Automatically follow and record HTTP redirects
 -   **🤖 LLM Schema Generation**: Easily create extraction schemas with LLM assistance
 -   **🔍 robots.txt Compliance**: Respect website crawling rules
 Read the full details in our [0.5.0 Release Notes](https://docs.crawl4ai.com/blog/releases/0.5.0.html) or check the [CHANGELOG](https://github.com/unclecode/crawl4ai/blob/main/CHANGELOG.md).
 ## Version Numbering in Crawl4AI
--- a/crawl4ai/init.py
+++ b/crawl4ai/init.py
@@ -2,7 +2,8 @@
 import warnings
 from .async_webcrawler import AsyncWebCrawler, CacheMode
-from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig
+from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig, LLMConfig
 from .content_scraping_strategy import (
    ContentScrapingStrategy,
    WebScrapingStrategy,
@@ -22,6 +23,7 @@ from .extraction_strategy import (
    CosineStrategy,
    JsonCssExtractionStrategy,
    JsonXPathExtractionStrategy,
    JsonLxmlExtractionStrategy
 )
 from .chunking_strategy import ChunkingStrategy, RegexChunking
 from .markdown_generation_strategy import DefaultMarkdownGenerator
@@ -42,12 +44,14 @@ from .async_dispatcher import (
 )
 from .docker_client import Crawl4aiDockerClient
 from .hub import CrawlerHub
 from .browser_profiler import BrowserProfiler
 from .deep_crawling import (
    DeepCrawlStrategy,
    BFSDeepCrawlStrategy,
    FilterChain,
-    ContentTypeFilter,
+    URLPatternFilter,
    DomainFilter,
    ContentTypeFilter,
    URLFilter,
    FilterStats,
    SEOFilter,
@@ -66,11 +70,14 @@ __all__ = [
    "AsyncLoggerBase",
    "AsyncLogger",
    "AsyncWebCrawler",
    "BrowserProfiler",
    "LLMConfig",
    "DeepCrawlStrategy",
    "BFSDeepCrawlStrategy",
    "BestFirstCrawlingStrategy",
    "DFSDeepCrawlStrategy",
    "FilterChain",
    "URLPatternFilter",
    "ContentTypeFilter",
    "DomainFilter",
    "FilterStats",
@@ -97,6 +104,7 @@ __all__ = [
    "CosineStrategy",
    "JsonCssExtractionStrategy",
    "JsonXPathExtractionStrategy",
    "JsonLxmlExtractionStrategy",
    "ChunkingStrategy",
    "RegexChunking",
    "DefaultMarkdownGenerator",
--- a/crawl4ai/version.py
+++ b/crawl4ai/version.py
@@ -1,2 +1,2 @@
 # crawl4ai/_version.py
-__version__ = "0.5.0"
+__version__ = "0.5.0.post4"
--- a/crawl4ai/async_configs.py
+++ b/crawl4ai/async_configs.py
@@ -11,19 +11,23 @@ from .config import (
 )
 from .user_agent_generator import UAGen, ValidUAGenerator  # , OnlineUAGenerator
-from .extraction_strategy import ExtractionStrategy
+from .extraction_strategy import ExtractionStrategy, LLMExtractionStrategy
 from .chunking_strategy import ChunkingStrategy, RegexChunking
 from .markdown_generation_strategy import MarkdownGenerationStrategy
 from .content_scraping_strategy import ContentScrapingStrategy, WebScrapingStrategy
 from .deep_crawling import DeepCrawlStrategy
-from typing import Union, List
+
 from .cache_context import CacheMode
 from .proxy_strategy import ProxyRotationStrategy
 from typing import Union, List
 import inspect
 from typing import Any, Dict, Optional
 from enum import Enum
 from .proxy_strategy import ProxyConfig
 def to_serializable_dict(obj: Any, ignore_default_value : bool = False) -> Dict:
    """
@@ -178,7 +182,7 @@ class BrowserConfig:
                              is "chromium". Default: "chromium".
        proxy (Optional[str]): Proxy server URL (e.g., "http://username:password@proxy:port"). If None, no proxy is used.
                             Default: None.
-        proxy_config (dict or None): Detailed proxy configuration, e.g. {"server": "...", "username": "..."}.
+        proxy_config (ProxyConfig or dict or None): Detailed proxy configuration, e.g. {"server": "...", "username": "..."}.
                                     If None, no additional proxy config. Default: None.
        viewport_width (int): Default viewport width for pages. Default: 1080.
        viewport_height (int): Default viewport height for pages. Default: 600.
@@ -223,7 +227,7 @@ class BrowserConfig:
        chrome_channel: str = "chromium",
        channel: str = "chromium",
        proxy: str = None,
-        proxy_config: dict = None,
+        proxy_config: Union[ProxyConfig, dict, None] = None,
        viewport_width: int = 1080,
        viewport_height: int = 600,
        viewport: dict = None,
@@ -313,7 +317,7 @@ class BrowserConfig:
            chrome_channel=kwargs.get("chrome_channel", "chromium"),
            channel=kwargs.get("channel", "chromium"),
            proxy=kwargs.get("proxy"),
-            proxy_config=kwargs.get("proxy_config"),
+            proxy_config=kwargs.get("proxy_config", None),
            viewport_width=kwargs.get("viewport_width", 1080),
            viewport_height=kwargs.get("viewport_height", 600),
            accept_downloads=kwargs.get("accept_downloads", False),
@@ -497,6 +501,15 @@ class CrawlerRunConfig():
                          Default: False.
        css_selector (str or None): CSS selector to extract a specific portion of the page.
                                    Default: None.
        target_elements (list of str or None): List of CSS selectors for specific elements for Markdown generation 
                                                and structured data extraction. When you set this, only the contents 
                                                of these elements are processed for extraction and Markdown generation. 
                                                If you do not set any value, the entire page is processed. 
                                                The difference between this and css_selector is that this will shrink 
                                                the initial raw HTML to the selected element, while this will only affect 
                                                the extraction and Markdown generation.
                                    Default: None
        excluded_tags (list of str or None): List of HTML tags to exclude from processing.
                                             Default: None.
        excluded_selector (str or None): CSS selector to exclude from processing.
@@ -513,7 +526,7 @@ class CrawlerRunConfig():
                           Default: "lxml".
        scraping_strategy (ContentScrapingStrategy): Scraping strategy to use.
                           Default: WebScrapingStrategy.
-        proxy_config (dict or None): Detailed proxy configuration, e.g. {"server": "...", "username": "..."}.
+        proxy_config (ProxyConfig or dict or None): Detailed proxy configuration, e.g. {"server": "...", "username": "..."}.
                                     If None, no additional proxy config. Default: None.
        # SSL Parameters
@@ -593,6 +606,8 @@ class CrawlerRunConfig():
                                     Default: IMAGE_SCORE_THRESHOLD (e.g., 3).
        exclude_external_images (bool): If True, exclude all external images from processing.
                                         Default: False.
        table_score_threshold (int): Minimum score threshold for processing a table.
                                     Default: 7.
        # Link and Domain Handling Parameters
        exclude_social_media_domains (list of str): List of domains to exclude for social media links.
@@ -646,6 +661,7 @@ class CrawlerRunConfig():
        markdown_generator: MarkdownGenerationStrategy = None,
        only_text: bool = False,
        css_selector: str = None,
        target_elements: List[str] = None,
        excluded_tags: list = None,
        excluded_selector: str = None,
        keep_data_attributes: bool = False,
@@ -654,7 +670,7 @@ class CrawlerRunConfig():
        prettiify: bool = False,
        parser_type: str = "lxml",
        scraping_strategy: ContentScrapingStrategy = None,
-        proxy_config: dict = None,
+        proxy_config: Union[ProxyConfig, dict, None] = None,
        proxy_rotation_strategy: Optional[ProxyRotationStrategy] = None,
        # SSL Parameters
        fetch_ssl_certificate: bool = False,
@@ -694,6 +710,7 @@ class CrawlerRunConfig():
        pdf: bool = False,
        image_description_min_word_threshold: int = IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD,
        image_score_threshold: int = IMAGE_SCORE_THRESHOLD,
        table_score_threshold: int = 7,
        exclude_external_images: bool = False,
        # Link and Domain Handling Parameters
        exclude_social_media_domains: list = None,
@@ -725,6 +742,7 @@ class CrawlerRunConfig():
        self.markdown_generator = markdown_generator
        self.only_text = only_text
        self.css_selector = css_selector
        self.target_elements = target_elements or []
        self.excluded_tags = excluded_tags or []
        self.excluded_selector = excluded_selector or ""
        self.keep_data_attributes = keep_data_attributes
@@ -779,6 +797,7 @@ class CrawlerRunConfig():
        self.image_description_min_word_threshold = image_description_min_word_threshold
        self.image_score_threshold = image_score_threshold
        self.exclude_external_images = exclude_external_images
        self.table_score_threshold = table_score_threshold
        # Link and Domain Handling Parameters
        self.exclude_social_media_domains = (
@@ -854,6 +873,7 @@ class CrawlerRunConfig():
            markdown_generator=kwargs.get("markdown_generator"),
            only_text=kwargs.get("only_text", False),
            css_selector=kwargs.get("css_selector"),
            target_elements=kwargs.get("target_elements", []),
            excluded_tags=kwargs.get("excluded_tags", []),
            excluded_selector=kwargs.get("excluded_selector", ""),
            keep_data_attributes=kwargs.get("keep_data_attributes", False),
@@ -909,6 +929,7 @@ class CrawlerRunConfig():
            image_score_threshold=kwargs.get(
                "image_score_threshold", IMAGE_SCORE_THRESHOLD
            ),
            table_score_threshold=kwargs.get("table_score_threshold", 7),
            exclude_external_images=kwargs.get("exclude_external_images", False),
            # Link and Domain Handling Parameters
            exclude_social_media_domains=kwargs.get(
@@ -954,6 +975,7 @@ class CrawlerRunConfig():
            "markdown_generator": self.markdown_generator,
            "only_text": self.only_text,
            "css_selector": self.css_selector,
            "target_elements": self.target_elements,
            "excluded_tags": self.excluded_tags,
            "excluded_selector": self.excluded_selector,
            "keep_data_attributes": self.keep_data_attributes,
@@ -997,6 +1019,7 @@ class CrawlerRunConfig():
            "pdf": self.pdf,
            "image_description_min_word_threshold": self.image_description_min_word_threshold,
            "image_score_threshold": self.image_score_threshold,
            "table_score_threshold": self.table_score_threshold,
            "exclude_external_images": self.exclude_external_images,
            "exclude_social_media_domains": self.exclude_social_media_domains,
            "exclude_external_links": self.exclude_external_links,
@@ -1042,7 +1065,7 @@ class CrawlerRunConfig():
        return CrawlerRunConfig.from_kwargs(config_dict)
-class LlmConfig:
+class LLMConfig:
    def __init__(
        self,
        provider: str = DEFAULT_PROVIDER,
@@ -1063,8 +1086,8 @@ class LlmConfig:
    @staticmethod
-    def from_kwargs(kwargs: dict) -> "LlmConfig":
+    def from_kwargs(kwargs: dict) -> "LLMConfig":
-        return LlmConfig(
+        return LLMConfig(
            provider=kwargs.get("provider", DEFAULT_PROVIDER),
            api_token=kwargs.get("api_token"),
            base_url=kwargs.get("base_url"),
@@ -1084,8 +1107,10 @@ class LlmConfig:
            **kwargs: Key-value pairs of configuration options to update
        Returns:
-            LLMConfig: A new instance with the specified updates
+            llm_config: A new instance with the specified updates
        """
        config_dict = self.to_dict()
        config_dict.update(kwargs)
-        return LlmConfig.from_kwargs(config_dict)
+        return LLMConfig.from_kwargs(config_dict)
--- a/crawl4ai/async_crawler_strategy.py
+++ b/crawl4ai/async_crawler_strategy.py
@@ -767,6 +767,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
            # Handle wait_for condition
            # Todo: Decide how to handle this
            if not config.wait_for and config.css_selector and False:
            # if not config.wait_for and config.css_selector:
                config.wait_for = f"css:{config.css_selector}"
            if config.wait_for:
@@ -806,8 +807,28 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
            if config.remove_overlay_elements:
                await self.remove_overlay_elements(page)
-            # Get final HTML content
+            if config.css_selector:
-            html = await page.content()
+                try:
                    # Handle comma-separated selectors by splitting them
                    selectors = [s.strip() for s in config.css_selector.split(',')]
                    html_parts = []
                    for selector in selectors:
                        try:
                            content = await page.evaluate(f"document.querySelector('{selector}')?.outerHTML || ''")
                            html_parts.append(content)
                        except Error as e:
                            print(f"Warning: Could not get content for selector '{selector}': {str(e)}")
                    # Wrap in a div to create a valid HTML structure
                    html = f"<div class='crawl4ai-result'>\n" + "\n".join(html_parts) + "\n</div>"                    
                except Error as e:
                    raise RuntimeError(f"Failed to extract HTML content: {str(e)}")
            else:
                html = await page.content()
            # # Get final HTML content
            # html = await page.content()
            await self.execute_hook(
                "before_return_html", page=page, html=html, context=context, config=config
            )
--- a/crawl4ai/async_database.py
+++ b/crawl4ai/async_database.py
@@ -4,10 +4,10 @@ import aiosqlite
 import asyncio
 from typing import Optional, Dict
 from contextlib import asynccontextmanager
 import logging
 import json  # Added for serialization/deserialization
 from .utils import ensure_content_dirs, generate_content_hash
 from .models import CrawlResult, MarkdownGenerationResult, StringCompatibleMarkdown
 # , StringCompatibleMarkdown
 import aiofiles
 from .utils import VersionManager
 from .async_logger import AsyncLogger
--- a/crawl4ai/async_dispatcher.py
+++ b/crawl4ai/async_dispatcher.py
@@ -13,7 +13,7 @@ from rich.live import Live
 from rich.table import Table
 from rich.console import Console
 from rich import box
-from datetime import timedelta
+from datetime import timedelta, datetime
 from collections.abc import AsyncGenerator
 import time
 import psutil
@@ -24,6 +24,8 @@ from urllib.parse import urlparse
 import random
 from abc import ABC, abstractmethod
 from math import inf as infinity
 class RateLimiter:
    def __init__(
@@ -250,7 +252,7 @@ class CrawlerMonitor:
            key=lambda x: (
                x.status != CrawlStatus.IN_PROGRESS,
                x.status != CrawlStatus.QUEUED,
-                x.end_time or float('inf'),
+                x.end_time or infinity,
            ),
        )[: self.max_visible_rows]
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -4,7 +4,7 @@ import sys
 import time
 from colorama import Fore
 from pathlib import Path
-from typing import Optional, List
+from typing import Optional, List, Generic, TypeVar
 import json
 import asyncio
@@ -23,7 +23,7 @@ from .async_crawler_strategy import (
    AsyncPlaywrightCrawlerStrategy,
    AsyncCrawlResponse,
 )
-from .cache_context import CacheMode, CacheContext, _legacy_to_cache_mode
+from .cache_context import CacheMode, CacheContext
 from .markdown_generation_strategy import (
    DefaultMarkdownGenerator,
    MarkdownGenerationStrategy,
@@ -44,17 +44,46 @@ from .utils import (
    RobotsParser,
 )
-from typing import Union, AsyncGenerator, TypeVar
+from typing import Union, AsyncGenerator
 CrawlResultT = TypeVar('CrawlResultT', bound=CrawlResult)
-RunManyReturn = Union[CrawlResultT, List[CrawlResultT], AsyncGenerator[CrawlResultT, None]]
+# RunManyReturn = Union[CrawlResultT, List[CrawlResultT], AsyncGenerator[CrawlResultT, None]]
-DeepCrawlSingleReturn = Union[List[CrawlResultT], AsyncGenerator[CrawlResultT, None]]
+class CrawlResultContainer(Generic[CrawlResultT]):
-DeepCrawlManyReturn = Union[
+    def __init__(self, results: Union[CrawlResultT, List[CrawlResultT]]):
-    List[List[CrawlResultT]],
+        # Normalize to a list
-    AsyncGenerator[CrawlResultT, None],
+        if isinstance(results, list):
            self._results = results
        else:
            self._results = [results]
    def __iter__(self):
        return iter(self._results)
    def __getitem__(self, index):
        return self._results[index]
    def __len__(self):
        return len(self._results)
    def __getattr__(self, attr):
        # Delegate attribute access to the first element.
        if self._results:
            return getattr(self._results[0], attr)
        raise AttributeError(f"{self.__class__.__name__} object has no attribute '{attr}'")
    def __repr__(self):
        return f"{self.__class__.__name__}({self._results!r})"
 # Redefine the union type. Now synchronous calls always return a container,
 # while stream mode is handled with an AsyncGenerator.
 RunManyReturn = Union[
    CrawlResultContainer[CrawlResultT],
    AsyncGenerator[CrawlResultT, None]
 ]
 class AsyncWebCrawler:
    """
    Asynchronous web crawler with flexible caching capabilities.
@@ -222,24 +251,7 @@ class AsyncWebCrawler:
    async def arun(
        self,
        url: str,
-        config: Optional[CrawlerRunConfig] = None,
+        config: CrawlerRunConfig = None,
        # Legacy parameters maintained for backwards compatibility
        word_count_threshold=MIN_WORD_THRESHOLD,
        extraction_strategy: ExtractionStrategy = None,
        chunking_strategy: ChunkingStrategy = RegexChunking(),
        content_filter: RelevantContentFilter = None,
        cache_mode: Optional[CacheMode] = None,
        # Deprecated cache parameters
        bypass_cache: bool = False,
        disable_cache: bool = False,
        no_cache_read: bool = False,
        no_cache_write: bool = False,
        # Other legacy parameters
        css_selector: str = None,
        screenshot: bool = False,
        pdf: bool = False,
        user_agent: str = None,
        verbose=True,
        **kwargs,
    ) -> RunManyReturn:
        """
@@ -270,45 +282,13 @@ class AsyncWebCrawler:
        Returns:
            CrawlResult: The result of crawling and processing
        """
-        crawler_config = config
+        config = config or CrawlerRunConfig()
        if not isinstance(url, str) or not url:
            raise ValueError("Invalid URL, make sure the URL is a non-empty string")
        async with self._lock or self.nullcontext():
            try:
-                # Handle configuration
+                self.logger.verbose = config.verbose
                if crawler_config is not None:
                    config = crawler_config
                else:
                    # Merge all parameters into a single kwargs dict for config creation
                    config_kwargs = {
                        "word_count_threshold": word_count_threshold,
                        "extraction_strategy": extraction_strategy,
                        "chunking_strategy": chunking_strategy,
                        "content_filter": content_filter,
                        "cache_mode": cache_mode,
                        "bypass_cache": bypass_cache,
                        "disable_cache": disable_cache,
                        "no_cache_read": no_cache_read,
                        "no_cache_write": no_cache_write,
                        "css_selector": css_selector,
                        "screenshot": screenshot,
                        "pdf": pdf,
                        "verbose": verbose,
                        **kwargs,
                    }
                    config = CrawlerRunConfig.from_kwargs(config_kwargs)
                # Handle deprecated cache parameters
                if any([bypass_cache, disable_cache, no_cache_read, no_cache_write]):
                    # Convert legacy parameters if cache_mode not provided
                    if config.cache_mode is None:
                        config.cache_mode = _legacy_to_cache_mode(
                            disable_cache=disable_cache,
                            bypass_cache=bypass_cache,
                            no_cache_read=no_cache_read,
                            no_cache_write=no_cache_write,
                        )
                # Default to ENABLED if no cache mode specified
                if config.cache_mode is None:
@@ -344,7 +324,11 @@ class AsyncWebCrawler:
                    # If screenshot is requested but its not in cache, then set cache_result to None
                    screenshot_data = cached_result.screenshot
                    pdf_data = cached_result.pdf
-                    if config.screenshot and not screenshot or config.pdf and not pdf:
+                    # if config.screenshot and not screenshot or config.pdf and not pdf:
                    if config.screenshot and not screenshot_data:
                        cached_result = None
                    if config.pdf and not pdf_data:
                        cached_result = None
                    self.logger.url_status(
@@ -358,12 +342,11 @@ class AsyncWebCrawler:
                if config and config.proxy_rotation_strategy:
                    next_proxy = await config.proxy_rotation_strategy.get_next_proxy()
                    if next_proxy:
-                        if verbose:
+                        self.logger.info(
-                            self.logger.info(
+                            message="Switch proxy: {proxy}",
-                                message="Switch proxy: {proxy}",
+                            tag="PROXY",
-                                tag="PROXY",
+                            params={"proxy": next_proxy.server},
-                                params={"proxy": next_proxy.server},
+                        )
                            )
                        config.proxy_config = next_proxy
                        # config = config.clone(proxy_config=next_proxy)
@@ -371,8 +354,8 @@ class AsyncWebCrawler:
                if not cached_result or not html:
                    t1 = time.perf_counter()
-                    if user_agent:
+                    if config.user_agent:
-                        self.crawler_strategy.update_user_agent(user_agent)
+                        self.crawler_strategy.update_user_agent(config.user_agent)
                    # Check robots.txt if enabled
                    if config and config.check_robots_txt:
@@ -452,7 +435,7 @@ class AsyncWebCrawler:
                    if cache_context.should_write() and not bool(cached_result):
                        await async_db_manager.acache_url(crawl_result)
-                    return crawl_result
+                    return CrawlResultContainer(crawl_result)
                else:
                    self.logger.success(
@@ -469,7 +452,7 @@ class AsyncWebCrawler:
                    cached_result.success = bool(html)
                    cached_result.session_id = getattr(config, "session_id", None)
                    cached_result.redirected_url = cached_result.redirected_url or url
-                    return cached_result
+                    return CrawlResultContainer(cached_result)
            except Exception as e:
                error_context = get_error_context(sys.exc_info())
@@ -487,8 +470,10 @@ class AsyncWebCrawler:
                    tag="ERROR",
                )
-                return CrawlResult(
+                return  CrawlResultContainer(
-                    url=url, html="", success=False, error_message=error_message
+                    CrawlResult(
                        url=url, html="", success=False, error_message=error_message
                    )
                )
    async def aprocess_html(
@@ -529,7 +514,8 @@ class AsyncWebCrawler:
                scraping_strategy.logger = self.logger
            # Process HTML content
-            params = {k: v for k, v in config.to_dict().items() if k not in ["url"]}
+            params = config.__dict__.copy()
            params.pop("url", None)            
            # add keys from kwargs to params that doesn't exist in params
            params.update({k: v for k, v in kwargs.items() if k not in params.keys()})
@@ -584,9 +570,9 @@ class AsyncWebCrawler:
        # Log processing completion
        self.logger.info(
-            message="Processed {url:.50}... | Time: {timing}ms",
+            message="{url:.50}... | Time: {timing}s",
            tag="SCRAPE",
-            params={"url": _url, "timing": int((time.perf_counter() - t1) * 1000)},
+            params={"url": _url, "timing": int((time.perf_counter() - t1) * 1000) / 1000},
        )
        ################################
@@ -664,17 +650,17 @@ class AsyncWebCrawler:
        config: Optional[CrawlerRunConfig] = None, 
        dispatcher: Optional[BaseDispatcher] = None,
        # Legacy parameters maintained for backwards compatibility
-        word_count_threshold=MIN_WORD_THRESHOLD,
+        # word_count_threshold=MIN_WORD_THRESHOLD,
-        extraction_strategy: ExtractionStrategy = None,
+        # extraction_strategy: ExtractionStrategy = None,
-        chunking_strategy: ChunkingStrategy = RegexChunking(),
+        # chunking_strategy: ChunkingStrategy = RegexChunking(),
-        content_filter: RelevantContentFilter = None,
+        # content_filter: RelevantContentFilter = None,
-        cache_mode: Optional[CacheMode] = None,
+        # cache_mode: Optional[CacheMode] = None,
-        bypass_cache: bool = False,
+        # bypass_cache: bool = False,
-        css_selector: str = None,
+        # css_selector: str = None,
-        screenshot: bool = False,
+        # screenshot: bool = False,
-        pdf: bool = False,
+        # pdf: bool = False,
-        user_agent: str = None,
+        # user_agent: str = None,
-        verbose=True,
+        # verbose=True,
        **kwargs
        ) -> RunManyReturn:
        """
@@ -707,20 +693,21 @@ class AsyncWebCrawler:
        ):
            print(f"Processed {result.url}: {len(result.markdown)} chars")
        """
-        if config is None:
+        config = config or CrawlerRunConfig()
-            config = CrawlerRunConfig(
+        # if config is None:
-                word_count_threshold=word_count_threshold,
+        #     config = CrawlerRunConfig(
-                extraction_strategy=extraction_strategy,
+        #         word_count_threshold=word_count_threshold,
-                chunking_strategy=chunking_strategy,
+        #         extraction_strategy=extraction_strategy,
-                content_filter=content_filter,
+        #         chunking_strategy=chunking_strategy,
-                cache_mode=cache_mode,
+        #         content_filter=content_filter,
-                bypass_cache=bypass_cache,
+        #         cache_mode=cache_mode,
-                css_selector=css_selector,
+        #         bypass_cache=bypass_cache,
-                screenshot=screenshot,
+        #         css_selector=css_selector,
-                pdf=pdf,
+        #         screenshot=screenshot,
-                verbose=verbose,
+        #         pdf=pdf,
-                **kwargs,
+        #         verbose=verbose,
-            )
+        #         **kwargs,
        #     )
        if dispatcher is None:
            dispatcher = MemoryAdaptiveDispatcher(
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -74,6 +74,7 @@ class ManagedBrowser:
            _get_browser_args(): Returns browser-specific command line arguments.
            _get_user_data_dir(): Returns the user data directory path.
            _cleanup(): Terminates the browser process and removes the temporary directory.
            create_profile(): Static method to create a user profile by launching a browser for user interaction.
    """
    browser_type: str
@@ -288,6 +289,80 @@ class ManagedBrowser:
                    tag="ERROR",
                    params={"error": str(e)},
                )
    # These methods have been moved to BrowserProfiler class
    @staticmethod
    async def create_profile(browser_config=None, profile_name=None, logger=None):
        """
        This method has been moved to the BrowserProfiler class.
        Creates a browser profile by launching a browser for interactive user setup
        and waits until the user closes it. The profile is stored in a directory that
        can be used later with BrowserConfig.user_data_dir.
        Please use BrowserProfiler.create_profile() instead.
        Example:
            ```python
            from crawl4ai.browser_profiler import BrowserProfiler
            profiler = BrowserProfiler()
            profile_path = await profiler.create_profile(profile_name="my-login-profile")
            ```
        """
        from .browser_profiler import BrowserProfiler
        # Create a BrowserProfiler instance and delegate to it
        profiler = BrowserProfiler(logger=logger)
        return await profiler.create_profile(profile_name=profile_name, browser_config=browser_config)
    @staticmethod
    def list_profiles():
        """
        This method has been moved to the BrowserProfiler class.
        Lists all available browser profiles in the Crawl4AI profiles directory.
        Please use BrowserProfiler.list_profiles() instead.
        Example:
            ```python
            from crawl4ai.browser_profiler import BrowserProfiler
            profiler = BrowserProfiler()
            profiles = profiler.list_profiles()
            ```
        """
        from .browser_profiler import BrowserProfiler
        # Create a BrowserProfiler instance and delegate to it
        profiler = BrowserProfiler()
        return profiler.list_profiles()
    @staticmethod
    def delete_profile(profile_name_or_path):
        """
        This method has been moved to the BrowserProfiler class.
        Delete a browser profile by name or path.
        Please use BrowserProfiler.delete_profile() instead.
        Example:
            ```python
            from crawl4ai.browser_profiler import BrowserProfiler
            profiler = BrowserProfiler()
            success = profiler.delete_profile("my-profile")
            ```
        """
        from .browser_profiler import BrowserProfiler
        # Create a BrowserProfiler instance and delegate to it
        profiler = BrowserProfiler()
        return profiler.delete_profile(profile_name_or_path)
 class BrowserManager:
@@ -304,6 +379,7 @@ class BrowserManager:
        sessions (dict): Dictionary to store session information
        session_ttl (int): Session timeout in seconds
    """
    def __init__(self, browser_config: BrowserConfig, logger=None):
        """
@@ -358,8 +434,9 @@ class BrowserManager:
            self.playwright = await async_playwright().start()
-        if self.config.use_managed_browser:
+        if self.config.cdp_url or self.config.use_managed_browser:
-            cdp_url = await self.managed_browser.start()
+            self.config.use_managed_browser = True
            cdp_url = await self.managed_browser.start() if not self.config.cdp_url else self.config.cdp_url
            self.browser = await self.playwright.chromium.connect_over_cdp(cdp_url)
            contexts = self.browser.contexts
            if contexts:
@@ -454,9 +531,9 @@ class BrowserManager:
                ProxySettings(server=self.config.proxy)
                if self.config.proxy
                else ProxySettings(
-                    server=self.config.proxy_config.get("server"),
+                    server=self.config.proxy_config.server,
-                    username=self.config.proxy_config.get("username"),
+                    username=self.config.proxy_config.username,
-                    password=self.config.proxy_config.get("password"),
+                    password=self.config.proxy_config.password,
                )
            )
            browser_args["proxy"] = proxy_settings
@@ -714,7 +791,10 @@ class BrowserManager:
        # If using a managed browser, just grab the shared default_context
        if self.config.use_managed_browser:
            context = self.default_context
-            page = await context.new_page()
+            pages = context.pages
            page = next((p for p in pages if p.url == crawlerRunConfig.url), None)
            if not page:
                page = await context.new_page()
        else:
            # Otherwise, check if we have an existing context for this config
            config_signature = self._make_config_signature(crawlerRunConfig)
@@ -764,6 +844,9 @@ class BrowserManager:
    async def close(self):
        """Close all browser resources and clean up."""
        if self.config.cdp_url:
            return
        if self.config.sleep_on_close:
            await asyncio.sleep(0.5)
--- a/crawl4ai/browser_profiler.py
+++ b/crawl4ai/browser_profiler.py
@@ -0,0 +1,769 @@
 """
 Browser Profiler Module
 This module provides a dedicated class for managing browser profiles
 that can be used for identity-based crawling with Crawl4AI.
 """
 import os
 import asyncio
 import signal
 import sys
 import datetime
 import uuid
 import shutil
 from typing import List, Dict, Optional, Any
 from colorama import Fore, Style, init
 from .async_configs import BrowserConfig
 from .browser_manager import ManagedBrowser
 from .async_logger import AsyncLogger, AsyncLoggerBase
 from .utils import get_home_folder
 class BrowserProfiler:
    """
    A dedicated class for managing browser profiles for Crawl4AI.
    The BrowserProfiler allows you to:
    - Create browser profiles interactively
    - List available profiles
    - Delete profiles when no longer needed
    - Get profile paths for use in BrowserConfig
    Profiles are stored by default in ~/.crawl4ai/profiles/
    """
    def __init__(self, logger: Optional[AsyncLoggerBase] = None):
        """
        Initialize the BrowserProfiler.
        Args:
            logger (AsyncLoggerBase, optional): Logger for outputting messages.
                If None, a default AsyncLogger will be created.
        """
        # Initialize colorama for colorful terminal output
        init()
        # Create a logger if not provided
        if logger is None:
            self.logger = AsyncLogger(verbose=True)
        elif not isinstance(logger, AsyncLoggerBase):
            self.logger = AsyncLogger(verbose=True)
        else:
            self.logger = logger
        # Ensure profiles directory exists
        self.profiles_dir = os.path.join(get_home_folder(), "profiles")
        os.makedirs(self.profiles_dir, exist_ok=True)
    async def create_profile(self, 
                            profile_name: Optional[str] = None, 
                            browser_config: Optional[BrowserConfig] = None) -> Optional[str]:
        """
        Creates a browser profile by launching a browser for interactive user setup
        and waits until the user closes it. The profile is stored in a directory that
        can be used later with BrowserConfig.user_data_dir.
        Args:
            profile_name (str, optional): Name for the profile directory.
                If None, a name is generated based on timestamp.
            browser_config (BrowserConfig, optional): Configuration for the browser.
                If None, a default configuration is used with headless=False.
        Returns:
            str: Path to the created profile directory, or None if creation failed
        Example:
            ```python
            profiler = BrowserProfiler()
            # Create a profile interactively
            profile_path = await profiler.create_profile(
                profile_name="my-login-profile"
            )
            # Use the profile in a crawler
            browser_config = BrowserConfig(
                headless=True,
                use_managed_browser=True,
                user_data_dir=profile_path
            )
            async with AsyncWebCrawler(config=browser_config) as crawler:
                # The crawler will now use your profile with all your cookies and login state
                result = await crawler.arun("https://example.com/dashboard")
            ```
        """
        # Create default browser config if none provided
        if browser_config is None:
            from .async_configs import BrowserConfig
            browser_config = BrowserConfig(
                browser_type="chromium",
                headless=False,  # Must be visible for user interaction
                verbose=True
            )
        else:
            # Ensure headless is False for user interaction
            browser_config.headless = False
        # Generate profile name if not provided
        if not profile_name:
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            profile_name = f"profile_{timestamp}_{uuid.uuid4().hex[:6]}"
        # Sanitize profile name (replace spaces and special chars)
        profile_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in profile_name)
        # Set user data directory
        profile_path = os.path.join(self.profiles_dir, profile_name)
        os.makedirs(profile_path, exist_ok=True)
        # Print instructions for the user with colorama formatting
        border = f"{Fore.CYAN}{'='*80}{Style.RESET_ALL}"
        self.logger.info(f"\n{border}", tag="PROFILE")
        self.logger.info(f"Creating browser profile: {Fore.GREEN}{profile_name}{Style.RESET_ALL}", tag="PROFILE")
        self.logger.info(f"Profile directory: {Fore.YELLOW}{profile_path}{Style.RESET_ALL}", tag="PROFILE")
        self.logger.info("\nInstructions:", tag="PROFILE")
        self.logger.info("1. A browser window will open for you to set up your profile.", tag="PROFILE")
        self.logger.info(f"2. {Fore.CYAN}Log in to websites{Style.RESET_ALL}, configure settings, etc. as needed.", tag="PROFILE")
        self.logger.info(f"3. When you're done, {Fore.YELLOW}press 'q' in this terminal{Style.RESET_ALL} to close the browser.", tag="PROFILE")
        self.logger.info("4. The profile will be saved and ready to use with Crawl4AI.", tag="PROFILE")
        self.logger.info(f"{border}\n", tag="PROFILE")
        # Create managed browser instance
        managed_browser = ManagedBrowser(
            browser_type=browser_config.browser_type,
            user_data_dir=profile_path,
            headless=False,  # Must be visible
            logger=self.logger,
            debugging_port=browser_config.debugging_port
        )
        # Set up signal handlers to ensure cleanup on interrupt
        original_sigint = signal.getsignal(signal.SIGINT)
        original_sigterm = signal.getsignal(signal.SIGTERM)
        # Define cleanup handler for signals
        async def cleanup_handler(sig, frame):
            self.logger.warning("\nCleaning up browser process...", tag="PROFILE")
            await managed_browser.cleanup()
            # Restore original signal handlers
            signal.signal(signal.SIGINT, original_sigint)
            signal.signal(signal.SIGTERM, original_sigterm)
            if sig == signal.SIGINT:
                self.logger.error("Profile creation interrupted. Profile may be incomplete.", tag="PROFILE")
                sys.exit(1)
        # Set signal handlers
        def sigint_handler(sig, frame):
            asyncio.create_task(cleanup_handler(sig, frame))
        signal.signal(signal.SIGINT, sigint_handler)
        signal.signal(signal.SIGTERM, sigint_handler)
        # Event to signal when user is done with the browser
        user_done_event = asyncio.Event()
        # Run keyboard input loop in a separate task
        async def listen_for_quit_command():
            import termios
            import tty
            import select
            # First output the prompt
            self.logger.info(f"{Fore.CYAN}Press '{Fore.WHITE}q{Fore.CYAN}' when you've finished using the browser...{Style.RESET_ALL}", tag="PROFILE")
            # Save original terminal settings
            fd = sys.stdin.fileno()
            old_settings = termios.tcgetattr(fd)
            try:
                # Switch to non-canonical mode (no line buffering)
                tty.setcbreak(fd)
                while True:
                    # Check if input is available (non-blocking)
                    readable, _, _ = select.select([sys.stdin], [], [], 0.5)
                    if readable:
                        key = sys.stdin.read(1)
                        if key.lower() == 'q':
                            self.logger.info(f"{Fore.GREEN}Closing browser and saving profile...{Style.RESET_ALL}", tag="PROFILE")
                            user_done_event.set()
                            return
                    # Check if the browser process has already exited
                    if managed_browser.browser_process and managed_browser.browser_process.poll() is not None:
                        self.logger.info("Browser already closed. Ending input listener.", tag="PROFILE")
                        user_done_event.set()
                        return
                    await asyncio.sleep(0.1)
            finally:
                # Restore terminal settings 
                termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
        try:
            # Start the browser
            await managed_browser.start()
            # Check if browser started successfully
            browser_process = managed_browser.browser_process
            if not browser_process:
                self.logger.error("Failed to start browser process.", tag="PROFILE")
                return None
            self.logger.info(f"Browser launched. {Fore.CYAN}Waiting for you to finish...{Style.RESET_ALL}", tag="PROFILE") 
            # Start listening for keyboard input
            listener_task = asyncio.create_task(listen_for_quit_command())
            # Wait for either the user to press 'q' or for the browser process to exit naturally
            while not user_done_event.is_set() and browser_process.poll() is None:
                await asyncio.sleep(0.5)
            # Cancel the listener task if it's still running
            if not listener_task.done():
                listener_task.cancel()
                try:
                    await listener_task
                except asyncio.CancelledError:
                    pass
            # If the browser is still running and the user pressed 'q', terminate it
            if browser_process.poll() is None and user_done_event.is_set():
                self.logger.info("Terminating browser process...", tag="PROFILE")
                await managed_browser.cleanup()
            self.logger.success(f"Browser closed. Profile saved at: {Fore.GREEN}{profile_path}{Style.RESET_ALL}", tag="PROFILE")
        except Exception as e:
            self.logger.error(f"Error creating profile: {str(e)}", tag="PROFILE")
            await managed_browser.cleanup()
            return None
        finally:
            # Restore original signal handlers
            signal.signal(signal.SIGINT, original_sigint)
            signal.signal(signal.SIGTERM, original_sigterm)
            # Make sure browser is fully cleaned up
            await managed_browser.cleanup()
        # Return the profile path
        return profile_path
    def list_profiles(self) -> List[Dict[str, Any]]:
        """
        Lists all available browser profiles in the Crawl4AI profiles directory.
        Returns:
            list: A list of dictionaries containing profile information:
                  [{"name": "profile_name", "path": "/path/to/profile", "created": datetime, "type": "chromium|firefox"}]
        Example:
            ```python
            profiler = BrowserProfiler()
            # List all available profiles
            profiles = profiler.list_profiles()
            for profile in profiles:
                print(f"Profile: {profile['name']}")
                print(f"  Path: {profile['path']}")
                print(f"  Created: {profile['created']}")
                print(f"  Browser type: {profile['type']}")
            ```
        """
        if not os.path.exists(self.profiles_dir):
            return []
        profiles = []
        for name in os.listdir(self.profiles_dir):
            profile_path = os.path.join(self.profiles_dir, name)
            # Skip if not a directory
            if not os.path.isdir(profile_path):
                continue
            # Check if this looks like a valid browser profile
            # For Chromium: Look for Preferences file
            # For Firefox: Look for prefs.js file
            is_valid = False
            if os.path.exists(os.path.join(profile_path, "Preferences")) or \
               os.path.exists(os.path.join(profile_path, "Default", "Preferences")):
                is_valid = "chromium"
            elif os.path.exists(os.path.join(profile_path, "prefs.js")):
                is_valid = "firefox"
            if is_valid:
                # Get creation time
                created = datetime.datetime.fromtimestamp(
                    os.path.getctime(profile_path)
                )
                profiles.append({
                    "name": name,
                    "path": profile_path,
                    "created": created,
                    "type": is_valid
                })
        # Sort by creation time, newest first
        profiles.sort(key=lambda x: x["created"], reverse=True)
        return profiles
    def get_profile_path(self, profile_name: str) -> Optional[str]:
        """
        Get the full path to a profile by name.
        Args:
            profile_name (str): Name of the profile (not the full path)
        Returns:
            str: Full path to the profile directory, or None if not found
        Example:
            ```python
            profiler = BrowserProfiler()
            path = profiler.get_profile_path("my-profile")
            if path:
                print(f"Profile path: {path}")
            else:
                print("Profile not found")
            ```
        """
        profile_path = os.path.join(self.profiles_dir, profile_name)
        # Check if path exists and is a valid profile
        if not os.path.isdir(profile_path):
            # Chrck if profile_name itself is full path
            if os.path.isabs(profile_name):
                profile_path = profile_name
            else:
                return None
        # Look for profile indicators
        is_profile = (
            os.path.exists(os.path.join(profile_path, "Preferences")) or
            os.path.exists(os.path.join(profile_path, "Default", "Preferences")) or
            os.path.exists(os.path.join(profile_path, "prefs.js"))
        )
        if not is_profile:
            return None  # Not a valid browser profile
        return profile_path
    def delete_profile(self, profile_name_or_path: str) -> bool:
        """
        Delete a browser profile by name or path.
        Args:
            profile_name_or_path (str): Name of the profile or full path to profile directory
        Returns:
            bool: True if the profile was deleted successfully, False otherwise
        Example:
            ```python
            profiler = BrowserProfiler()
            # Delete by name
            success = profiler.delete_profile("my-profile")
            # Delete by path
            success = profiler.delete_profile("/path/to/.crawl4ai/profiles/my-profile")
            ```
        """
        # Determine if input is a name or a path
        if os.path.isabs(profile_name_or_path):
            # Full path provided
            profile_path = profile_name_or_path
        else:
            # Just a name provided, construct path
            profile_path = os.path.join(self.profiles_dir, profile_name_or_path)
        # Check if path exists and is a valid profile
        if not os.path.isdir(profile_path):
            return False
        # Look for profile indicators
        is_profile = (
            os.path.exists(os.path.join(profile_path, "Preferences")) or
            os.path.exists(os.path.join(profile_path, "Default", "Preferences")) or
            os.path.exists(os.path.join(profile_path, "prefs.js"))
        )
        if not is_profile:
            return False  # Not a valid browser profile
        # Delete the profile directory
        try:
            shutil.rmtree(profile_path)
            return True
        except Exception:
            return False
    async def interactive_manager(self, crawl_callback=None):
        """
        Launch an interactive profile management console.
        Args:
            crawl_callback (callable, optional): Function to call when selecting option to use 
                a profile for crawling. It will be called with (profile_path, url).
        Example:
            ```python
            profiler = BrowserProfiler()
            # Define a custom crawl function
            async def my_crawl_function(profile_path, url):
                print(f"Crawling {url} with profile {profile_path}")
                # Implement your crawling logic here
            # Start interactive manager
            await profiler.interactive_manager(crawl_callback=my_crawl_function)
            ```
        """
        while True:
            self.logger.info(f"\n{Fore.CYAN}Profile Management Options:{Style.RESET_ALL}", tag="MENU")
            self.logger.info(f"1. {Fore.GREEN}Create a new profile{Style.RESET_ALL}", tag="MENU")
            self.logger.info(f"2. {Fore.YELLOW}List available profiles{Style.RESET_ALL}", tag="MENU")
            self.logger.info(f"3. {Fore.RED}Delete a profile{Style.RESET_ALL}", tag="MENU")
            # Only show crawl option if callback provided
            if crawl_callback:
                self.logger.info(f"4. {Fore.CYAN}Use a profile to crawl a website{Style.RESET_ALL}", tag="MENU")
                self.logger.info(f"5. {Fore.MAGENTA}Exit{Style.RESET_ALL}", tag="MENU")
                exit_option = "5"
            else:
                self.logger.info(f"4. {Fore.MAGENTA}Exit{Style.RESET_ALL}", tag="MENU")
                exit_option = "4"
            choice = input(f"\n{Fore.CYAN}Enter your choice (1-{exit_option}): {Style.RESET_ALL}")
            if choice == "1":
                # Create new profile
                name = input(f"{Fore.GREEN}Enter a name for the new profile (or press Enter for auto-generated name): {Style.RESET_ALL}")
                await self.create_profile(name or None)
            elif choice == "2":
                # List profiles
                profiles = self.list_profiles()
                if not profiles:
                    self.logger.warning("  No profiles found. Create one first with option 1.", tag="PROFILES")
                    continue
                # Print profile information with colorama formatting
                self.logger.info("\nAvailable profiles:", tag="PROFILES")
                for i, profile in enumerate(profiles):
                    self.logger.info(f"[{i+1}] {Fore.CYAN}{profile['name']}{Style.RESET_ALL}", tag="PROFILES")
                    self.logger.info(f"    Path: {Fore.YELLOW}{profile['path']}{Style.RESET_ALL}", tag="PROFILES")
                    self.logger.info(f"    Created: {profile['created'].strftime('%Y-%m-%d %H:%M:%S')}", tag="PROFILES")
                    self.logger.info(f"    Browser type: {profile['type']}", tag="PROFILES")
                    self.logger.info("", tag="PROFILES")  # Empty line for spacing
            elif choice == "3":
                # Delete profile
                profiles = self.list_profiles()
                if not profiles:
                    self.logger.warning("No profiles found to delete", tag="PROFILES")
                    continue
                # Display numbered list
                self.logger.info(f"\n{Fore.YELLOW}Available profiles:{Style.RESET_ALL}", tag="PROFILES")
                for i, profile in enumerate(profiles):
                    self.logger.info(f"[{i+1}] {profile['name']}", tag="PROFILES")
                # Get profile to delete
                profile_idx = input(f"{Fore.RED}Enter the number of the profile to delete (or 'c' to cancel): {Style.RESET_ALL}")
                if profile_idx.lower() == 'c':
                    continue
                try:
                    idx = int(profile_idx) - 1
                    if 0 <= idx < len(profiles):
                        profile_name = profiles[idx]["name"]
                        self.logger.info(f"Deleting profile: {Fore.YELLOW}{profile_name}{Style.RESET_ALL}", tag="PROFILES")
                        # Confirm deletion
                        confirm = input(f"{Fore.RED}Are you sure you want to delete this profile? (y/n): {Style.RESET_ALL}")
                        if confirm.lower() == 'y':
                            success = self.delete_profile(profiles[idx]["path"])
                            if success:
                                self.logger.success(f"Profile {Fore.GREEN}{profile_name}{Style.RESET_ALL} deleted successfully", tag="PROFILES")
                            else:
                                self.logger.error(f"Failed to delete profile {Fore.RED}{profile_name}{Style.RESET_ALL}", tag="PROFILES")
                    else:
                        self.logger.error("Invalid profile number", tag="PROFILES")
                except ValueError:
                    self.logger.error("Please enter a valid number", tag="PROFILES")
            elif choice == "4" and crawl_callback:
                # Use profile to crawl a site
                profiles = self.list_profiles()
                if not profiles:
                    self.logger.warning("No profiles found. Create one first.", tag="PROFILES")
                    continue
                # Display numbered list
                self.logger.info(f"\n{Fore.YELLOW}Available profiles:{Style.RESET_ALL}", tag="PROFILES")
                for i, profile in enumerate(profiles):
                    self.logger.info(f"[{i+1}] {profile['name']}", tag="PROFILES")
                # Get profile to use
                profile_idx = input(f"{Fore.CYAN}Enter the number of the profile to use (or 'c' to cancel): {Style.RESET_ALL}")
                if profile_idx.lower() == 'c':
                    continue
                try:
                    idx = int(profile_idx) - 1
                    if 0 <= idx < len(profiles):
                        profile_path = profiles[idx]["path"]
                        url = input(f"{Fore.CYAN}Enter the URL to crawl: {Style.RESET_ALL}")
                        if url:
                            # Call the provided crawl callback
                            await crawl_callback(profile_path, url)
                        else:
                            self.logger.error("No URL provided", tag="CRAWL")
                    else:
                        self.logger.error("Invalid profile number", tag="PROFILES")
                except ValueError:
                    self.logger.error("Please enter a valid number", tag="PROFILES")
            elif choice == exit_option:
                # Exit
                self.logger.info("Exiting profile management", tag="MENU")
                break
            else:
                self.logger.error(f"Invalid choice. Please enter a number between 1 and {exit_option}.", tag="MENU")
    async def launch_standalone_browser(self, 
                                  browser_type: str = "chromium",
                                  user_data_dir: Optional[str] = None,
                                  debugging_port: int = 9222,
                                  headless: bool = False) -> Optional[str]:
        """
        Launch a standalone browser with CDP debugging enabled and keep it running
        until the user presses 'q'. Returns and displays the CDP URL.
        Args:
            browser_type (str): Type of browser to launch ('chromium' or 'firefox')
            user_data_dir (str, optional): Path to user profile directory
            debugging_port (int): Port to use for CDP debugging
            headless (bool): Whether to run in headless mode
        Returns:
            str: CDP URL for the browser, or None if launch failed
        Example:
            ```python
            profiler = BrowserProfiler()
            cdp_url = await profiler.launch_standalone_browser(
                user_data_dir="/path/to/profile",
                debugging_port=9222
            )
            # Use cdp_url to connect to the browser
            ```
        """
        # Use the provided directory if specified, otherwise create a temporary directory
        if user_data_dir:
            # Directory is provided directly, ensure it exists
            profile_path = user_data_dir
            os.makedirs(profile_path, exist_ok=True)
        else:
            # Create a temporary profile directory
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            profile_name = f"temp_{timestamp}_{uuid.uuid4().hex[:6]}"
            profile_path = os.path.join(self.profiles_dir, profile_name)
            os.makedirs(profile_path, exist_ok=True)
        # Print initial information
        border = f"{Fore.CYAN}{'='*80}{Style.RESET_ALL}"
        self.logger.info(f"\n{border}", tag="CDP")
        self.logger.info(f"Launching standalone browser with CDP debugging", tag="CDP")
        self.logger.info(f"Browser type: {Fore.GREEN}{browser_type}{Style.RESET_ALL}", tag="CDP")
        self.logger.info(f"Profile path: {Fore.YELLOW}{profile_path}{Style.RESET_ALL}", tag="CDP")
        self.logger.info(f"Debugging port: {Fore.CYAN}{debugging_port}{Style.RESET_ALL}", tag="CDP")
        self.logger.info(f"Headless mode: {Fore.CYAN}{headless}{Style.RESET_ALL}", tag="CDP")
        # Create managed browser instance
        managed_browser = ManagedBrowser(
            browser_type=browser_type,
            user_data_dir=profile_path,
            headless=headless,
            logger=self.logger,
            debugging_port=debugging_port
        )
        # Set up signal handlers to ensure cleanup on interrupt
        original_sigint = signal.getsignal(signal.SIGINT)
        original_sigterm = signal.getsignal(signal.SIGTERM)
        # Define cleanup handler for signals
        async def cleanup_handler(sig, frame):
            self.logger.warning("\nCleaning up browser process...", tag="CDP")
            await managed_browser.cleanup()
            # Restore original signal handlers
            signal.signal(signal.SIGINT, original_sigint)
            signal.signal(signal.SIGTERM, original_sigterm)
            if sig == signal.SIGINT:
                self.logger.error("Browser terminated by user.", tag="CDP")
                sys.exit(1)
        # Set signal handlers
        def sigint_handler(sig, frame):
            asyncio.create_task(cleanup_handler(sig, frame))
        signal.signal(signal.SIGINT, sigint_handler)
        signal.signal(signal.SIGTERM, sigint_handler)
        # Event to signal when user wants to exit
        user_done_event = asyncio.Event()
        # Run keyboard input loop in a separate task
        async def listen_for_quit_command():
            import termios
            import tty
            import select
            # First output the prompt
            self.logger.info(f"{Fore.CYAN}Press '{Fore.WHITE}q{Fore.CYAN}' to stop the browser and exit...{Style.RESET_ALL}", tag="CDP")
            # Save original terminal settings
            fd = sys.stdin.fileno()
            old_settings = termios.tcgetattr(fd)
            try:
                # Switch to non-canonical mode (no line buffering)
                tty.setcbreak(fd)
                while True:
                    # Check if input is available (non-blocking)
                    readable, _, _ = select.select([sys.stdin], [], [], 0.5)
                    if readable:
                        key = sys.stdin.read(1)
                        if key.lower() == 'q':
                            self.logger.info(f"{Fore.GREEN}Closing browser...{Style.RESET_ALL}", tag="CDP")
                            user_done_event.set()
                            return
                    # Check if the browser process has already exited
                    if managed_browser.browser_process and managed_browser.browser_process.poll() is not None:
                        self.logger.info("Browser already closed. Ending input listener.", tag="CDP")
                        user_done_event.set()
                        return
                    await asyncio.sleep(0.1)
            finally:
                # Restore terminal settings 
                termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
        # Function to retrieve and display CDP JSON config
        async def get_cdp_json(port):
            import aiohttp
            cdp_url = f"http://localhost:{port}"
            json_url = f"{cdp_url}/json/version"
            try:
                async with aiohttp.ClientSession() as session:
                    # Try multiple times in case the browser is still starting up
                    for _ in range(10):
                        try:
                            async with session.get(json_url) as response:
                                if response.status == 200:
                                    data = await response.json()
                                    return cdp_url, data
                        except Exception:
                            pass
                        await asyncio.sleep(0.5)
                    return cdp_url, None
            except Exception as e:
                self.logger.error(f"Error fetching CDP JSON: {str(e)}", tag="CDP")
                return cdp_url, None
        cdp_url = None
        config_json = None
        try:
            # Start the browser
            await managed_browser.start()
            # Check if browser started successfully
            browser_process = managed_browser.browser_process
            if not browser_process:
                self.logger.error("Failed to start browser process.", tag="CDP")
                return None
            self.logger.info(f"Browser launched successfully. Retrieving CDP information...", tag="CDP") 
            # Get CDP URL and JSON config
            cdp_url, config_json = await get_cdp_json(debugging_port)
            if cdp_url:
                self.logger.success(f"CDP URL: {Fore.GREEN}{cdp_url}{Style.RESET_ALL}", tag="CDP")
                if config_json:
                    # Display relevant CDP information
                    self.logger.info(f"Browser: {Fore.CYAN}{config_json.get('Browser', 'Unknown')}{Style.RESET_ALL}", tag="CDP")
                    self.logger.info(f"Protocol Version: {config_json.get('Protocol-Version', 'Unknown')}", tag="CDP")
                    if 'webSocketDebuggerUrl' in config_json:
                        self.logger.info(f"WebSocket URL: {Fore.GREEN}{config_json['webSocketDebuggerUrl']}{Style.RESET_ALL}", tag="CDP")
                else:
                    self.logger.warning("Could not retrieve CDP configuration JSON", tag="CDP")
            else:
                self.logger.error(f"Failed to get CDP URL on port {debugging_port}", tag="CDP")
                await managed_browser.cleanup()
                return None
            # Start listening for keyboard input
            listener_task = asyncio.create_task(listen_for_quit_command())
            # Wait for the user to press 'q' or for the browser process to exit naturally
            while not user_done_event.is_set() and browser_process.poll() is None:
                await asyncio.sleep(0.5)
            # Cancel the listener task if it's still running
            if not listener_task.done():
                listener_task.cancel()
                try:
                    await listener_task
                except asyncio.CancelledError:
                    pass
            # If the browser is still running and the user pressed 'q', terminate it
            if browser_process.poll() is None and user_done_event.is_set():
                self.logger.info("Terminating browser process...", tag="CDP")
                await managed_browser.cleanup()
            self.logger.success(f"Browser closed.", tag="CDP")
        except Exception as e:
            self.logger.error(f"Error launching standalone browser: {str(e)}", tag="CDP")
            await managed_browser.cleanup()
            return None
        finally:
            # Restore original signal handlers
            signal.signal(signal.SIGINT, original_sigint)
            signal.signal(signal.SIGTERM, original_sigterm)
            # Make sure browser is fully cleaned up
            await managed_browser.cleanup()
        # Return the CDP URL
        return cdp_url
--- a/crawl4ai/cli.py
+++ b/crawl4ai/cli.py
@@ -1,9 +1,18 @@
 import click
 import os
-from typing import Dict, Any, Optional
+import sys
 import time
 import humanize
 from typing import Dict, Any, Optional, List
 import json
 import yaml
 import anyio
 from rich.console import Console
 from rich.table import Table
 from rich.panel import Panel
 from rich.prompt import Prompt, Confirm
 from crawl4ai import (
    CacheMode,
    AsyncWebCrawler, 
@@ -14,12 +23,16 @@ from crawl4ai import (
    JsonCssExtractionStrategy,
    JsonXPathExtractionStrategy,
    BM25ContentFilter, 
-    PruningContentFilter
+    PruningContentFilter,
    BrowserProfiler,
    LLMConfig
 )
 from litellm import completion
 from pathlib import Path
-from crawl4ai.async_configs import LlmConfig
+
 # Initialize rich console
 console = Console()
 def get_global_config() -> dict:
    config_dir = Path.home() / ".crawl4ai"
@@ -172,7 +185,38 @@ def show_examples():
    # Crawler settings
    crwl https://example.com -c "css_selector=#main,delay_before_return_html=2,scan_full_page=true"
-4️⃣  Sample Config Files:
+4️⃣  Profile Management for Identity-Based Crawling:
    # Launch interactive profile manager
    crwl profiles
    # Create, list, and delete browser profiles for identity-based crawling
    # Use a profile for crawling (keeps you logged in)
    crwl https://example.com -p my-profile-name
    # Example: Crawl a site that requires login
    # 1. First create a profile and log in:
    crwl profiles
    # 2. Then use that profile to crawl the authenticated site:
    crwl https://site-requiring-login.com/dashboard -p my-profile-name
 5️⃣  CDP Mode for Browser Automation:
    # Launch browser with CDP debugging on default port 9222
    crwl cdp
    # Use a specific profile and custom port
    crwl cdp -p my-profile -P 9223
    # Launch headless browser with CDP enabled
    crwl cdp --headless
    # Launch in incognito mode (ignores profile)
    crwl cdp --incognito
    # Use the CDP URL with other tools (Puppeteer, Playwright, etc.)
    # The URL will be displayed in the terminal when the browser starts
 6️⃣  Sample Config Files:
 browser.yml:
    headless: true
@@ -230,7 +274,7 @@ llm_schema.json:
      }
    }
-5️⃣  Advanced Usage:
+7️⃣  Advanced Usage:
    # Combine configs with direct parameters
    crwl https://example.com -B browser.yml -b "headless=false,viewport_width=1920"
@@ -248,9 +292,15 @@ llm_schema.json:
        -f filter_bm25.yml \\
        -o markdown-fit
    # Authenticated crawling with profile
    crwl https://login-required-site.com \\
        -p my-authenticated-profile \\
        -c "css_selector=.dashboard-content" \\
        -o markdown
 For more documentation visit: https://github.com/unclecode/crawl4ai
-6️⃣  Q&A with LLM:
+8️⃣  Q&A with LLM:
    # Ask a question about the content
    crwl https://example.com -q "What is the main topic discussed?"
@@ -277,12 +327,331 @@ For more documentation visit: https://github.com/unclecode/crawl4ai
      - google/gemini-pro
    See full list of providers: https://docs.litellm.ai/docs/providers
 9️⃣ Profile Management:
    # Launch interactive profile manager
    crwl profiles
    # Create a profile and use it for crawling
    crwl profiles  # Create and set up your profile interactively
    crwl https://example.com -p my-profile-name  # Use profile for crawling
    # Example workflow for authenticated site
    # 1. First create a profile and log in to the site:
    crwl profiles  # Select "Create new profile" option
    # 2. Then use that profile to crawl authenticated content:
    crwl https://site-requiring-login.com/dashboard -p my-profile-name
 """
    click.echo(examples)
-@click.command(context_settings={"help_option_names": ["-h", "--help"]})
+def get_directory_size(path: str) -> int:
-@click.argument("url", required=False)
+    """Calculate the total size of a directory in bytes"""
-@click.option("--example", is_flag=True, help="Show usage examples")
+    total_size = 0
    for dirpath, _, filenames in os.walk(path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)
    return total_size
 def display_profiles_table(profiles: List[Dict[str, Any]]):
    """Display a rich table of browser profiles"""
    if not profiles:
        console.print(Panel("[yellow]No profiles found. Create one with the 'create' command.[/yellow]", 
                          title="Browser Profiles", border_style="blue"))
        return
    table = Table(title="Browser Profiles", show_header=True, header_style="bold cyan", border_style="blue")
    table.add_column("#", style="dim", width=4)
    table.add_column("Name", style="cyan", no_wrap=True)
    table.add_column("Path", style="green")
    table.add_column("Created", style="yellow")
    table.add_column("Browser", style="magenta")
    table.add_column("Size", style="blue", justify="right")
    for i, profile in enumerate(profiles):
        # Calculate folder size
        size = get_directory_size(profile["path"])
        human_size = humanize.naturalsize(size)
        # Format creation date
        created = profile["created"].strftime("%Y-%m-%d %H:%M")
        # Add row to table
        table.add_row(
            str(i+1), 
            profile["name"], 
            profile["path"], 
            created, 
            profile["type"].capitalize(), 
            human_size
        )
    console.print(table)
 async def create_profile_interactive(profiler: BrowserProfiler):
    """Interactive profile creation wizard"""
    console.print(Panel("[bold cyan]Create Browser Profile[/bold cyan]\n"
                      "This will open a browser window for you to set up your identity.\n"
                      "Log in to sites, adjust settings, then press 'q' to save.",
                      border_style="cyan"))
    profile_name = Prompt.ask("[cyan]Enter profile name[/cyan]", default=f"profile_{int(time.time())}")
    console.print("[cyan]Creating profile...[/cyan]")
    console.print("[yellow]A browser window will open. After logging in to sites, press 'q' in this terminal to save.[/yellow]")
    # Create the profile
    try:
        profile_path = await profiler.create_profile(profile_name)
        if profile_path:
            console.print(f"[green]Profile successfully created at:[/green] {profile_path}")
        else:
            console.print("[red]Failed to create profile.[/red]")
    except Exception as e:
        console.print(f"[red]Error creating profile: {str(e)}[/red]")
 def delete_profile_interactive(profiler: BrowserProfiler):
    """Interactive profile deletion"""
    profiles = profiler.list_profiles()
    if not profiles:
        console.print("[yellow]No profiles found to delete.[/yellow]")
        return
    # Display profiles
    display_profiles_table(profiles)
    # Get profile selection
    idx = Prompt.ask(
        "[red]Enter number of profile to delete[/red]", 
        console=console,
        choices=[str(i+1) for i in range(len(profiles))],
        show_choices=False
    )
    try:
        idx = int(idx) - 1
        profile = profiles[idx]
        # Confirm deletion
        if Confirm.ask(f"[red]Are you sure you want to delete profile '{profile['name']}'?[/red]"):
            success = profiler.delete_profile(profile["path"])
            if success:
                console.print(f"[green]Profile '{profile['name']}' deleted successfully.[/green]")
            else:
                console.print(f"[red]Failed to delete profile '{profile['name']}'.[/red]")
    except (ValueError, IndexError):
        console.print("[red]Invalid selection.[/red]")
 async def crawl_with_profile_cli(profile_path, url):
    """Use a profile to crawl a website via CLI"""
    console.print(f"[cyan]Crawling [bold]{url}[/bold] using profile at [bold]{profile_path}[/bold][/cyan]")
    # Create browser config with the profile
    browser_cfg = BrowserConfig(
        headless=False,  # Set to False to see the browser in action
        use_managed_browser=True,
        user_data_dir=profile_path
    )
    # Default crawler config
    crawler_cfg = CrawlerRunConfig()
    # Ask for output format
    output_format = Prompt.ask(
        "[cyan]Output format[/cyan]",
        choices=["all", "json", "markdown", "md", "title"],
        default="markdown"
    )
    try:
        # Run the crawler
        result = await run_crawler(url, browser_cfg, crawler_cfg, True)
        # Handle output
        if output_format == "all":
            console.print(json.dumps(result.model_dump(), indent=2))
        elif output_format == "json":
            console.print(json.dumps(json.loads(result.extracted_content), indent=2))
        elif output_format in ["markdown", "md"]:
            console.print(result.markdown.raw_markdown)
        elif output_format == "title":
            console.print(result.metadata.get("title", "No title found"))
        console.print(f"[green]Successfully crawled[/green] {url}")
        return result
    except Exception as e:
        console.print(f"[red]Error crawling:[/red] {str(e)}")
        return None
 async def use_profile_to_crawl():
    """Interactive profile selection for crawling"""
    profiler = BrowserProfiler()
    profiles = profiler.list_profiles()
    if not profiles:
        console.print("[yellow]No profiles found. Create one first.[/yellow]")
        return
    # Display profiles
    display_profiles_table(profiles)
    # Get profile selection
    idx = Prompt.ask(
        "[cyan]Enter number of profile to use[/cyan]", 
        console=console,
        choices=[str(i+1) for i in range(len(profiles))],
        show_choices=False
    )
    try:
        idx = int(idx) - 1
        profile = profiles[idx]
        # Get URL
        url = Prompt.ask("[cyan]Enter URL to crawl[/cyan]")
        if url:
            # Crawl with the selected profile
            await crawl_with_profile_cli(profile["path"], url)
        else:
            console.print("[red]No URL provided[/red]")
    except (ValueError, IndexError):
        console.print("[red]Invalid selection[/red]")
 async def manage_profiles():
    """Interactive profile management menu"""
    profiler = BrowserProfiler()
    options = {
        "1": "List profiles",
        "2": "Create new profile",
        "3": "Delete profile",
        "4": "Use a profile to crawl a website",
        "5": "Exit",
    }
    while True:
        console.print(Panel("[bold cyan]Browser Profile Manager[/bold cyan]", border_style="cyan"))
        for key, value in options.items():
            color = "green" if key == "1" else "yellow" if key == "2" else "red" if key == "3" else "blue" if key == "4" else "cyan"
            console.print(f"[{color}]{key}[/{color}]. {value}")
        choice = Prompt.ask("Enter choice", choices=list(options.keys()), default="1")
        if choice == "1":
            # List profiles
            profiles = profiler.list_profiles()
            display_profiles_table(profiles)
        elif choice == "2":
            # Create profile
            await create_profile_interactive(profiler)
        elif choice == "3":
            # Delete profile
            delete_profile_interactive(profiler)
        elif choice == "4":
            # Use profile to crawl
            await use_profile_to_crawl()
        elif choice == "5":
            # Exit
            console.print("[cyan]Exiting profile manager.[/cyan]")
            break
        # Add a separator between operations
        console.print("\n")
@click.group(context_settings={"help_option_names": ["-h", "--help"]})
 def cli():
    """Crawl4AI CLI - Web content extraction and browser profile management tool"""
    pass
@cli.command("cdp")
@click.option("--user-data-dir", "-d", help="Directory to use for browser data (will be created if it doesn't exist)")
@click.option("--port", "-P", type=int, default=9222, help="Debugging port (default: 9222)")
@click.option("--browser-type", "-b", type=click.Choice(["chromium", "firefox"]), default="chromium", 
              help="Browser type (default: chromium)")
@click.option("--headless", is_flag=True, help="Run browser in headless mode")
@click.option("--incognito", is_flag=True, help="Run in incognito/private mode (ignores user-data-dir)")
 def cdp_cmd(user_data_dir: Optional[str], port: int, browser_type: str, headless: bool, incognito: bool):
    """Launch a standalone browser with CDP debugging enabled
    This command launches a browser with Chrome DevTools Protocol (CDP) debugging enabled,
    prints the CDP URL, and keeps the browser running until you press 'q'.
    The CDP URL can be used for various automation and debugging tasks.
    Examples:
        # Launch Chromium with CDP on default port 9222
        crwl cdp
        # Use a specific directory for browser data and custom port
        crwl cdp --user-data-dir ~/browser-data --port 9223
        # Launch in headless mode
        crwl cdp --headless
        # Launch in incognito mode (ignores user-data-dir)
        crwl cdp --incognito
    """
    profiler = BrowserProfiler()
    try:
        # Handle data directory
        data_dir = None
        if not incognito and user_data_dir:
            # Expand user path (~/something)
            expanded_path = os.path.expanduser(user_data_dir)
            # Create directory if it doesn't exist
            if not os.path.exists(expanded_path):
                console.print(f"[yellow]Directory '{expanded_path}' doesn't exist. Creating it.[/yellow]")
                os.makedirs(expanded_path, exist_ok=True)
            data_dir = expanded_path
        # Print launch info
        console.print(Panel(
            f"[cyan]Launching browser with CDP debugging[/cyan]\n\n"
            f"Browser type: [green]{browser_type}[/green]\n"
            f"Debugging port: [yellow]{port}[/yellow]\n"
            f"User data directory: [cyan]{data_dir or 'Temporary directory'}[/cyan]\n"
            f"Headless: [cyan]{'Yes' if headless else 'No'}[/cyan]\n"
            f"Incognito: [cyan]{'Yes' if incognito else 'No'}[/cyan]\n\n"
            f"[yellow]Press 'q' to quit when done[/yellow]",
            title="CDP Browser",
            border_style="cyan"
        ))
        # Run the browser
        cdp_url = anyio.run(
            profiler.launch_standalone_browser,
            browser_type,
            data_dir,
            port,
            headless
        )
        if not cdp_url:
            console.print("[red]Failed to launch browser or get CDP URL[/red]")
            sys.exit(1)
    except Exception as e:
        console.print(f"[red]Error launching CDP browser: {str(e)}[/red]")
        sys.exit(1)
@cli.command("crawl")
@click.argument("url", required=True)
@click.option("--browser-config", "-B", type=click.Path(exists=True), help="Browser config file (YAML/JSON)")
@click.option("--crawler-config", "-C", type=click.Path(exists=True), help="Crawler config file (YAML/JSON)")
@click.option("--filter-config", "-f", type=click.Path(exists=True), help="Content filter config file")
@@ -291,26 +660,44 @@ For more documentation visit: https://github.com/unclecode/crawl4ai
@click.option("--browser", "-b", type=str, callback=parse_key_values, help="Browser parameters as key1=value1,key2=value2")
@click.option("--crawler", "-c", type=str, callback=parse_key_values, help="Crawler parameters as key1=value1,key2=value2")
@click.option("--output", "-o", type=click.Choice(["all", "json", "markdown", "md", "markdown-fit", "md-fit"]), default="all")
-@click.option("--bypass-cache", is_flag=True, default = True,  help="Bypass cache when crawling")
+@click.option("--bypass-cache", is_flag=True, default=True, help="Bypass cache when crawling")
@click.option("--question", "-q", help="Ask a question about the crawled content")
@click.option("--verbose", "-v", is_flag=True)
-def cli(url: str, example: bool, browser_config: str, crawler_config: str, filter_config: str, 
+@click.option("--profile", "-p", help="Use a specific browser profile (by name)")
-        extraction_config: str, schema: str, browser: Dict, crawler: Dict,
+def crawl_cmd(url: str, browser_config: str, crawler_config: str, filter_config: str, 
-        output: str, bypass_cache: bool, question: str, verbose: bool):
+           extraction_config: str, schema: str, browser: Dict, crawler: Dict,
-    """Crawl4AI CLI - Web content extraction tool
+           output: str, bypass_cache: bool, question: str, verbose: bool, profile: str):
-
+    """Crawl a website and extract content
    Simple Usage:
-        crwl https://example.com
+        crwl crawl https://example.com
    """
-    Run with --example to see detailed usage examples."""
+    # Handle profile option
-
+    if profile:
-    if example:
+        profiler = BrowserProfiler()
-        show_examples()
+        profile_path = profiler.get_profile_path(profile)
        return
-    if not url:
+        if not profile_path:
-        raise click.UsageError("URL argument is required unless using --example")
+            profiles = profiler.list_profiles()
-    
+            
            if profiles:
                console.print(f"[red]Profile '{profile}' not found. Available profiles:[/red]")
                display_profiles_table(profiles)
            else:
                console.print("[red]No profiles found. Create one with 'crwl profiles'[/red]")
            return
        # Include the profile in browser config
        if not browser:
            browser = {}
        browser["user_data_dir"] = profile_path
        browser["use_managed_browser"] = True
        if verbose:
            console.print(f"[green]Using browser profile:[/green] {profile}")
    try:
        # Load base configurations
        browser_cfg = BrowserConfig.load(load_config_file(browser_config))
@@ -353,7 +740,7 @@ def cli(url: str, example: bool, browser_config: str, crawler_config: str, filte
                    raise click.ClickException("LLM provider and API token are required for LLM extraction")
                crawler_cfg.extraction_strategy = LLMExtractionStrategy(
-                    llmConfig=LlmConfig(provider=extract_conf["provider"], api_token=extract_conf["api_token"]),
+                    llm_config=LLMConfig(provider=extract_conf["provider"], api_token=extract_conf["api_token"]),
                    instruction=extract_conf["instruction"],
                    schema=schema_data,
                    **extract_conf.get("params", {})
@@ -401,5 +788,89 @@ def cli(url: str, example: bool, browser_config: str, crawler_config: str, filte
    except Exception as e:
        raise click.ClickException(str(e))
@cli.command("examples")
 def examples_cmd():
    """Show usage examples"""
    show_examples()
@cli.command("profiles")
 def profiles_cmd():
    """Manage browser profiles interactively
    Launch an interactive browser profile manager where you can:
    - List all existing profiles
    - Create new profiles for authenticated browsing
    - Delete unused profiles
    """
    # Run interactive profile manager
    anyio.run(manage_profiles)
@cli.command(name="")
@click.argument("url", required=False)
@click.option("--example", is_flag=True, help="Show usage examples")
@click.option("--browser-config", "-B", type=click.Path(exists=True), help="Browser config file (YAML/JSON)")
@click.option("--crawler-config", "-C", type=click.Path(exists=True), help="Crawler config file (YAML/JSON)")
@click.option("--filter-config", "-f", type=click.Path(exists=True), help="Content filter config file")
@click.option("--extraction-config", "-e", type=click.Path(exists=True), help="Extraction strategy config file")
@click.option("--schema", "-s", type=click.Path(exists=True), help="JSON schema for extraction")
@click.option("--browser", "-b", type=str, callback=parse_key_values, help="Browser parameters as key1=value1,key2=value2")
@click.option("--crawler", "-c", type=str, callback=parse_key_values, help="Crawler parameters as key1=value1,key2=value2")
@click.option("--output", "-o", type=click.Choice(["all", "json", "markdown", "md", "markdown-fit", "md-fit"]), default="all")
@click.option("--bypass-cache", is_flag=True, default=True, help="Bypass cache when crawling")
@click.option("--question", "-q", help="Ask a question about the crawled content")
@click.option("--verbose", "-v", is_flag=True)
@click.option("--profile", "-p", help="Use a specific browser profile (by name)")
 def default(url: str, example: bool, browser_config: str, crawler_config: str, filter_config: str, 
        extraction_config: str, schema: str, browser: Dict, crawler: Dict,
        output: str, bypass_cache: bool, question: str, verbose: bool, profile: str):
    """Crawl4AI CLI - Web content extraction tool
    Simple Usage:
        crwl https://example.com
    Run with --example to see detailed usage examples.
    Other commands:
        crwl profiles   - Manage browser profiles for identity-based crawling
        crwl crawl      - Crawl a website with advanced options
        crwl cdp        - Launch browser with CDP debugging enabled
        crwl examples   - Show more usage examples
    """
    if example:
        show_examples()
        return
    if not url:
        # Show help without error message
        ctx = click.get_current_context()
        click.echo(ctx.get_help())
        return
    # Forward to crawl command
    ctx = click.get_current_context()
    ctx.invoke(
        crawl_cmd, 
        url=url, 
        browser_config=browser_config,
        crawler_config=crawler_config,
        filter_config=filter_config,
        extraction_config=extraction_config, 
        schema=schema,
        browser=browser,
        crawler=crawler,
        output=output,
        bypass_cache=bypass_cache,
        question=question,
        verbose=verbose,
        profile=profile
    )
 def main():
    import sys
    if len(sys.argv) < 2 or sys.argv[1] not in cli.commands:
        sys.argv.insert(1, "crawl")
    cli()
 if __name__ == "__main__":
-    cli()
+    main()
--- a/crawl4ai/configs/init.py
+++ b/crawl4ai/configs/init.py
@@ -1,2 +0,0 @@
 from .proxy_config import ProxyConfig
 __all__ = ["ProxyConfig"]
--- a/crawl4ai/configs/proxy_config.py
+++ b/crawl4ai/configs/proxy_config.py
@@ -1,113 +0,0 @@
 import os
 from typing import Dict, List, Optional
 class ProxyConfig:
    def __init__(
        self,
        server: str,
        username: Optional[str] = None,
        password: Optional[str] = None,
        ip: Optional[str] = None,
    ):
        """Configuration class for a single proxy.
        Args:
            server: Proxy server URL (e.g., "http://127.0.0.1:8080")
            username: Optional username for proxy authentication
            password: Optional password for proxy authentication
            ip: Optional IP address for verification purposes
        """
        self.server = server
        self.username = username
        self.password = password
        # Extract IP from server if not explicitly provided
        self.ip = ip or self._extract_ip_from_server()
    def _extract_ip_from_server(self) -> Optional[str]:
        """Extract IP address from server URL."""
        try:
            # Simple extraction assuming http://ip:port format
            if "://" in self.server:
                parts = self.server.split("://")[1].split(":")
                return parts[0]
            else:
                parts = self.server.split(":")
                return parts[0]
        except Exception:
            return None
    @staticmethod
    def from_string(proxy_str: str) -> "ProxyConfig":
        """Create a ProxyConfig from a string in the format 'ip:port:username:password'."""
        parts = proxy_str.split(":")
        if len(parts) == 4:  # ip:port:username:password
            ip, port, username, password = parts
            return ProxyConfig(
                server=f"http://{ip}:{port}",
                username=username,
                password=password,
                ip=ip
            )
        elif len(parts) == 2:  # ip:port only
            ip, port = parts
            return ProxyConfig(
                server=f"http://{ip}:{port}",
                ip=ip
            )
        else:
            raise ValueError(f"Invalid proxy string format: {proxy_str}")
    @staticmethod
    def from_dict(proxy_dict: Dict) -> "ProxyConfig":
        """Create a ProxyConfig from a dictionary."""
        return ProxyConfig(
            server=proxy_dict.get("server"),
            username=proxy_dict.get("username"),
            password=proxy_dict.get("password"),
            ip=proxy_dict.get("ip")
        )
    @staticmethod
    def from_env(env_var: str = "PROXIES") -> List["ProxyConfig"]:
        """Load proxies from environment variable.
        Args:
            env_var: Name of environment variable containing comma-separated proxy strings
        Returns:
            List of ProxyConfig objects
        """
        proxies = []
        try:
            proxy_list = os.getenv(env_var, "").split(",")
            for proxy in proxy_list:
                if not proxy:
                    continue
                proxies.append(ProxyConfig.from_string(proxy))
        except Exception as e:
            print(f"Error loading proxies from environment: {e}")
        return proxies
    def to_dict(self) -> Dict:
        """Convert to dictionary representation."""
        return {
            "server": self.server,
            "username": self.username,
            "password": self.password,
            "ip": self.ip
        }
    def clone(self, **kwargs) -> "ProxyConfig":
        """Create a copy of this configuration with updated values.
        Args:
            **kwargs: Key-value pairs of configuration options to update
        Returns:
            ProxyConfig: A new instance with the specified updates
        """
        config_dict = self.to_dict()
        config_dict.update(kwargs)
        return ProxyConfig.from_dict(config_dict)
--- a/crawl4ai/content_filter_strategy.py
+++ b/crawl4ai/content_filter_strategy.py
@@ -16,13 +16,13 @@ from .utils import (
    extract_xml_data,
    merge_chunks,
 )
 from .types import LLMConfig
 from .config import DEFAULT_PROVIDER, OVERLAP_RATE, WORD_TOKEN_RATE
 from abc import ABC, abstractmethod
 import math
 from snowballstemmer import stemmer
 from .config import DEFAULT_PROVIDER, OVERLAP_RATE, WORD_TOKEN_RATE, PROVIDER_MODELS
 from .models import TokenUsage
 from .prompts import PROMPT_FILTER_CONTENT
 import os
 import json
 import hashlib
 from pathlib import Path
@@ -770,37 +770,56 @@ class PruningContentFilter(RelevantContentFilter):
 class LLMContentFilter(RelevantContentFilter):
-    """Content filtering using LLMs to generate relevant markdown."""
+    """Content filtering using LLMs to generate relevant markdown.
    How it works:
    1. Extracts page metadata with fallbacks.
    2. Extracts text chunks from the body element.
    3. Applies LLMs to generate markdown for each chunk.
    4. Filters out chunks below the threshold.
    5. Sorts chunks by score in descending order.
    6. Returns the top N chunks.
    Attributes:
        llm_config (LLMConfig): LLM configuration object.
        instruction (str): Instruction for LLM markdown generation
        chunk_token_threshold (int): Chunk token threshold for splitting (default: 1e9).
        overlap_rate (float): Overlap rate for chunking (default: 0.5).
        word_token_rate (float): Word token rate for chunking (default: 0.2).
        verbose (bool): Enable verbose logging (default: False).
        logger (AsyncLogger): Custom logger for LLM operations (optional).
    """
    _UNWANTED_PROPS = {
-        'provider' : 'Instead, use llmConfig=LlmConfig(provider="...")',
+        'provider' : 'Instead, use llm_config=LLMConfig(provider="...")',
-        'api_token' : 'Instead, use llmConfig=LlMConfig(api_token="...")',
+        'api_token' : 'Instead, use llm_config=LlMConfig(api_token="...")',
-        'base_url' : 'Instead, use llmConfig=LlmConfig(base_url="...")',
+        'base_url' : 'Instead, use llm_config=LLMConfig(base_url="...")',
-        'api_base' : 'Instead, use llmConfig=LlmConfig(base_url="...")',
+        'api_base' : 'Instead, use llm_config=LLMConfig(base_url="...")',
    }
    def __init__(
        self,
-        provider: str = DEFAULT_PROVIDER,
+        llm_config: "LLMConfig" = None,
        api_token: Optional[str] = None,
        llmConfig: "LlmConfig" = None,
        instruction: str = None,
        chunk_token_threshold: int = int(1e9),
        overlap_rate: float = OVERLAP_RATE,
        word_token_rate: float = WORD_TOKEN_RATE,
        base_url: Optional[str] = None,
        api_base: Optional[str] = None,
        extra_args: Dict = None,
        # char_token_rate: float = WORD_TOKEN_RATE * 5,
        # chunk_mode: str = "char",
        verbose: bool = False,
        logger: Optional[AsyncLogger] = None,
        ignore_cache: bool = True,
        # Deprecated properties
        provider: str = DEFAULT_PROVIDER,
        api_token: Optional[str] = None,
        base_url: Optional[str] = None,
        api_base: Optional[str] = None,
        extra_args: Dict = None,
    ):
        super().__init__(None)
        self.provider = provider
        self.api_token = api_token
        self.base_url = base_url or api_base
-        self.llmConfig = llmConfig
+        self.llm_config = llm_config
        self.instruction = instruction
        self.chunk_token_threshold = chunk_token_threshold
        self.overlap_rate = overlap_rate
@@ -872,7 +891,7 @@ class LLMContentFilter(RelevantContentFilter):
            self.logger.info(
                "Starting LLM markdown content filtering process",
                tag="LLM",
-                params={"provider": self.llmConfig.provider},
+                params={"provider": self.llm_config.provider},
                colors={"provider": Fore.CYAN},
            )
@@ -959,10 +978,10 @@ class LLMContentFilter(RelevantContentFilter):
                future = executor.submit(
                    _proceed_with_chunk,
-                    self.llmConfig.provider,
+                    self.llm_config.provider,
                    prompt,
-                    self.llmConfig.api_token,
+                    self.llm_config.api_token,
-                    self.llmConfig.base_url,
+                    self.llm_config.base_url,
                    self.extra_args,
                )
                futures.append((i, future))
--- a/crawl4ai/content_scraping_strategy.py
+++ b/crawl4ai/content_scraping_strategy.py
@@ -155,6 +155,7 @@ class WebScrapingStrategy(ContentScrapingStrategy):
                for aud in raw_result.get("media", {}).get("audios", [])
                if aud
            ],
            tables=raw_result.get("media", {}).get("tables", [])
        )
        # Convert links
@@ -193,6 +194,153 @@ class WebScrapingStrategy(ContentScrapingStrategy):
        """
        return await asyncio.to_thread(self._scrap, url, html, **kwargs)
    def is_data_table(self, table: Tag, **kwargs) -> bool:
        """
        Determine if a table element is a data table (not a layout table).
        Args:
            table (Tag): BeautifulSoup Tag representing a table element
            **kwargs: Additional keyword arguments including table_score_threshold
        Returns:
            bool: True if the table is a data table, False otherwise
        """
        score = 0
        # Check for thead and tbody
        has_thead = len(table.select('thead')) > 0
        has_tbody = len(table.select('tbody')) > 0
        if has_thead:
            score += 2
        if has_tbody:
            score += 1
        # Check for th elements
        th_count = len(table.select('th'))
        if th_count > 0:
            score += 2
            if has_thead or len(table.select('tr:first-child th')) > 0:
                score += 1
        # Check for nested tables
        if len(table.select('table')) > 0:
            score -= 3
        # Role attribute check
        role = table.get('role', '').lower()
        if role in {'presentation', 'none'}:
            score -= 3
        # Column consistency
        rows = table.select('tr')
        if not rows:
            return False
        col_counts = [len(row.select('td, th')) for row in rows]
        avg_cols = sum(col_counts) / len(col_counts)
        variance = sum((c - avg_cols)**2 for c in col_counts) / len(col_counts)
        if variance < 1:
            score += 2
        # Caption and summary
        if table.select('caption'):
            score += 2
        if table.has_attr('summary') and table['summary']:
            score += 1
        # Text density
        total_text = sum(len(cell.get_text().strip()) for row in rows for cell in row.select('td, th'))
        total_tags = sum(1 for _ in table.descendants if isinstance(_, Tag))
        text_ratio = total_text / (total_tags + 1e-5)
        if text_ratio > 20:
            score += 3
        elif text_ratio > 10:
            score += 2
        # Data attributes
        data_attrs = sum(1 for attr in table.attrs if attr.startswith('data-'))
        score += data_attrs * 0.5
        # Size check
        if avg_cols >= 2 and len(rows) >= 2:
            score += 2
        threshold = kwargs.get('table_score_threshold', 7)
        return score >= threshold
    def extract_table_data(self, table: Tag) -> dict:
        """
        Extract structured data from a table element.
        Args:
            table (Tag): BeautifulSoup Tag representing a table element
        Returns:
            dict: Dictionary containing table data (headers, rows, caption, summary)
        """
        caption_elem = table.select_one('caption')
        caption = caption_elem.get_text().strip() if caption_elem else ""
        summary = table.get('summary', '').strip()
        # Extract headers with colspan handling
        headers = []
        thead_rows = table.select('thead tr')
        if thead_rows:
            header_cells = thead_rows[0].select('th')
            for cell in header_cells:
                text = cell.get_text().strip()
                colspan = int(cell.get('colspan', 1))
                headers.extend([text] * colspan)
        else:
            first_row = table.select('tr:first-child')
            if first_row:
                for cell in first_row[0].select('th, td'):
                    text = cell.get_text().strip()
                    colspan = int(cell.get('colspan', 1))
                    headers.extend([text] * colspan)
        # Extract rows with colspan handling
        rows = []
        all_rows = table.select('tr')
        thead = table.select_one('thead')
        tbody_rows = []
        if thead:
            thead_rows = thead.select('tr')
            tbody_rows = [row for row in all_rows if row not in thead_rows]
        else:
            if all_rows and all_rows[0].select('th'):
                tbody_rows = all_rows[1:]
            else:
                tbody_rows = all_rows
        for row in tbody_rows:        
        # for row in table.select('tr:not(:has(ancestor::thead))'):
            row_data = []
            for cell in row.select('td'):
                text = cell.get_text().strip()
                colspan = int(cell.get('colspan', 1))
                row_data.extend([text] * colspan)
            if row_data:
                rows.append(row_data)
        # Align rows with headers
        max_columns = len(headers) if headers else (max(len(row) for row in rows) if rows else 0)
        aligned_rows = []
        for row in rows:
            aligned = row[:max_columns] + [''] * (max_columns - len(row))
            aligned_rows.append(aligned)
        if not headers:
            headers = [f"Column {i+1}" for i in range(max_columns)]
        return {
            "headers": headers,
            "rows": aligned_rows,
            "caption": caption,
            "summary": summary,
        }
    def flatten_nested_elements(self, node):
        """
        Flatten nested elements in a HTML tree.
@@ -431,7 +579,7 @@ class WebScrapingStrategy(ContentScrapingStrategy):
        Returns:
            dict: A dictionary containing the processed element information.
        """
-        media = {"images": [], "videos": [], "audios": []}
+        media = {"images": [], "videos": [], "audios": [], "tables": []}
        internal_links_dict = {}
        external_links_dict = {}
        self._process_element(
@@ -691,6 +839,7 @@ class WebScrapingStrategy(ContentScrapingStrategy):
        html: str,
        word_count_threshold: int = MIN_WORD_THRESHOLD,
        css_selector: str = None,
        target_elements: List[str] = None,
        **kwargs,
    ) -> Dict[str, Any]:
        """
@@ -745,22 +894,37 @@ class WebScrapingStrategy(ContentScrapingStrategy):
                for element in body.select(excluded_selector):
                    element.extract()
-        if css_selector:
+        # if False and css_selector:
-            selected_elements = body.select(css_selector)
+        #     selected_elements = body.select(css_selector)
-            if not selected_elements:
+        #     if not selected_elements:
-                return {
+        #         return {
-                    "markdown": "",
+        #             "markdown": "",
-                    "cleaned_html": "",
+        #             "cleaned_html": "",
-                    "success": True,
+        #             "success": True,
-                    "media": {"images": [], "videos": [], "audios": []},
+        #             "media": {"images": [], "videos": [], "audios": []},
-                    "links": {"internal": [], "external": []},
+        #             "links": {"internal": [], "external": []},
-                    "metadata": {},
+        #             "metadata": {},
-                    "message": f"No elements found for CSS selector: {css_selector}",
+        #             "message": f"No elements found for CSS selector: {css_selector}",
-                }
+        #         }
-                # raise InvalidCSSSelectorError(f"Invalid CSS selector, No elements found for CSS selector: {css_selector}")
+        #         # raise InvalidCSSSelectorError(f"Invalid CSS selector, No elements found for CSS selector: {css_selector}")
-            body = soup.new_tag("div")
+        #     body = soup.new_tag("div")
-            for el in selected_elements:
+        #     for el in selected_elements:
-                body.append(el)
+        #         body.append(el)
        content_element = None
        if target_elements:
            try:
                for_content_targeted_element = []
                for target_element in target_elements:
                    for_content_targeted_element.extend(body.select(target_element))
                content_element = soup.new_tag("div")
                for el in for_content_targeted_element:
                    content_element.append(el)
            except Exception as e:
                self._log("error", f"Error with target element detection: {str(e)}", "SCRAPE")
                return None
        else:
            content_element = body        
        kwargs["exclude_social_media_domains"] = set(
            kwargs.get("exclude_social_media_domains", []) + SOCIAL_MEDIA_DOMAINS
@@ -800,6 +964,15 @@ class WebScrapingStrategy(ContentScrapingStrategy):
            if result is not None
            for img in result
        ]
        # Process tables if not excluded
        excluded_tags = set(kwargs.get("excluded_tags", []) or [])
        if 'table' not in excluded_tags:
            tables = body.find_all('table')
            for table in tables:
                if self.is_data_table(table, **kwargs):
                    table_data = self.extract_table_data(table)
                    media["tables"].append(table_data)
        body = self.flatten_nested_elements(body)
        base64_pattern = re.compile(r'data:image/[^;]+;base64,([^"]+)')
@@ -811,7 +984,7 @@ class WebScrapingStrategy(ContentScrapingStrategy):
        str_body = ""
        try:
-            str_body = body.encode_contents().decode("utf-8")
+            str_body = content_element.encode_contents().decode("utf-8")
        except Exception:
            # Reset body to the original HTML
            success = False
@@ -850,7 +1023,6 @@ class WebScrapingStrategy(ContentScrapingStrategy):
        cleaned_html = str_body.replace("\n\n", "\n").replace("  ", " ")
        return {
            # **markdown_content,
            "cleaned_html": cleaned_html,
            "success": success,
            "media": media,
@@ -1193,12 +1365,125 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy):
        return root
    def is_data_table(self, table: etree.Element, **kwargs) -> bool:
        score = 0
        # Check for thead and tbody
        has_thead = len(table.xpath(".//thead")) > 0
        has_tbody = len(table.xpath(".//tbody")) > 0
        if has_thead:
            score += 2
        if has_tbody:
            score += 1
        # Check for th elements
        th_count = len(table.xpath(".//th"))
        if th_count > 0:
            score += 2
            if has_thead or table.xpath(".//tr[1]/th"):
                score += 1
        # Check for nested tables
        if len(table.xpath(".//table")) > 0:
            score -= 3
        # Role attribute check
        role = table.get("role", "").lower()
        if role in {"presentation", "none"}:
            score -= 3
        # Column consistency
        rows = table.xpath(".//tr")
        if not rows:
            return False
        col_counts = [len(row.xpath(".//td|.//th")) for row in rows]
        avg_cols = sum(col_counts) / len(col_counts)
        variance = sum((c - avg_cols)**2 for c in col_counts) / len(col_counts)
        if variance < 1:
            score += 2
        # Caption and summary
        if table.xpath(".//caption"):
            score += 2
        if table.get("summary"):
            score += 1
        # Text density
        total_text = sum(len(''.join(cell.itertext()).strip()) for row in rows for cell in row.xpath(".//td|.//th"))
        total_tags = sum(1 for _ in table.iterdescendants())
        text_ratio = total_text / (total_tags + 1e-5)
        if text_ratio > 20:
            score += 3
        elif text_ratio > 10:
            score += 2
        # Data attributes
        data_attrs = sum(1 for attr in table.attrib if attr.startswith('data-'))
        score += data_attrs * 0.5
        # Size check
        if avg_cols >= 2 and len(rows) >= 2:
            score += 2
        threshold = kwargs.get("table_score_threshold", 7)
        return score >= threshold
    def extract_table_data(self, table: etree.Element) -> dict:
        caption = table.xpath(".//caption/text()")
        caption = caption[0].strip() if caption else ""
        summary = table.get("summary", "").strip()
        # Extract headers with colspan handling
        headers = []
        thead_rows = table.xpath(".//thead/tr")
        if thead_rows:
            header_cells = thead_rows[0].xpath(".//th")
            for cell in header_cells:
                text = cell.text_content().strip()
                colspan = int(cell.get("colspan", 1))
                headers.extend([text] * colspan)
        else:
            first_row = table.xpath(".//tr[1]")
            if first_row:
                for cell in first_row[0].xpath(".//th|.//td"):
                    text = cell.text_content().strip()
                    colspan = int(cell.get("colspan", 1))
                    headers.extend([text] * colspan)
        # Extract rows with colspan handling
        rows = []
        for row in table.xpath(".//tr[not(ancestor::thead)]"):
            row_data = []
            for cell in row.xpath(".//td"):
                text = cell.text_content().strip()
                colspan = int(cell.get("colspan", 1))
                row_data.extend([text] * colspan)
            if row_data:
                rows.append(row_data)
        # Align rows with headers
        max_columns = len(headers) if headers else (max(len(row) for row in rows) if rows else 0)
        aligned_rows = []
        for row in rows:
            aligned = row[:max_columns] + [''] * (max_columns - len(row))
            aligned_rows.append(aligned)
        if not headers:
            headers = [f"Column {i+1}" for i in range(max_columns)]
        return {
            "headers": headers,
            "rows": aligned_rows,
            "caption": caption,
            "summary": summary,
        }
    def _scrap(
        self,
        url: str,
        html: str,
        word_count_threshold: int = MIN_WORD_THRESHOLD,
        css_selector: str = None,
        target_elements: List[str] = None,
        **kwargs,
    ) -> Dict[str, Any]:
        if not html:
@@ -1249,24 +1534,38 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy):
                meta = {}
            # Handle CSS selector targeting
-            if css_selector:
+            # if css_selector:
            #     try:
            #         selected_elements = body.cssselect(css_selector)
            #         if not selected_elements:
            #             return {
            #                 "markdown": "",
            #                 "cleaned_html": "",
            #                 "success": True,
            #                 "media": {"images": [], "videos": [], "audios": []},
            #                 "links": {"internal": [], "external": []},
            #                 "metadata": meta,
            #                 "message": f"No elements found for CSS selector: {css_selector}",
            #             }
            #         body = lhtml.Element("div")
            #         body.extend(selected_elements)
            #     except Exception as e:
            #         self._log("error", f"Error with CSS selector: {str(e)}", "SCRAPE")
            #         return None
            content_element = None
            if target_elements:
                try:
-                    selected_elements = body.cssselect(css_selector)
+                    for_content_targeted_element = []
-                    if not selected_elements:
+                    for target_element in target_elements:
-                        return {
+                        for_content_targeted_element.extend(body.cssselect(target_element))
-                            "markdown": "",
+                    content_element = lhtml.Element("div")
-                            "cleaned_html": "",
+                    content_element.extend(for_content_targeted_element)
                            "success": True,
                            "media": {"images": [], "videos": [], "audios": []},
                            "links": {"internal": [], "external": []},
                            "metadata": meta,
                            "message": f"No elements found for CSS selector: {css_selector}",
                        }
                    body = lhtml.Element("div")
                    body.extend(selected_elements)
                except Exception as e:
-                    self._log("error", f"Error with CSS selector: {str(e)}", "SCRAPE")
+                    self._log("error", f"Error with target element detection: {str(e)}", "SCRAPE")
                    return None
            else:
                content_element = body
            # Remove script and style tags
            for tag in ["script", "style", "link", "meta", "noscript"]:
@@ -1290,7 +1589,7 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy):
                        form.getparent().remove(form)
            # Process content
-            media = {"images": [], "videos": [], "audios": []}
+            media = {"images": [], "videos": [], "audios": [], "tables": []}
            internal_links_dict = {}
            external_links_dict = {}
@@ -1304,6 +1603,13 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy):
                **kwargs,
            )
            if 'table' not in excluded_tags:
                tables = body.xpath(".//table")
                for table in tables:
                    if self.is_data_table(table, **kwargs):
                        table_data = self.extract_table_data(table)
                        media["tables"].append(table_data)
            # Handle only_text option
            if kwargs.get("only_text", False):
                for tag in ONLY_TEXT_ELIGIBLE_TAGS:
@@ -1330,7 +1636,8 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy):
            # Generate output HTML
            cleaned_html = lhtml.tostring(
-                body,
+                # body,   
                content_element,
                encoding="unicode",
                pretty_print=True,
                method="html",
@@ -1375,7 +1682,12 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy):
            return {
                "cleaned_html": cleaned_html,
                "success": False,
-                "media": {"images": [], "videos": [], "audios": []},
+                "media": {
                    "images": [],
                    "videos": [],
                    "audios": [],
                    "tables": []
                },
                "links": {"internal": [], "external": []},
                "metadata": {},
            }
--- a/crawl4ai/deep_crawling/base_strategy.py
+++ b/crawl4ai/deep_crawling/base_strategy.py
@@ -16,7 +16,7 @@ class DeepCrawlDecorator:
    def __call__(self, original_arun):
        @wraps(original_arun)
-        async def wrapped_arun(url: str, config: Optional[CrawlerRunConfig] = None, **kwargs):
+        async def wrapped_arun(url: str, config: CrawlerRunConfig = None, **kwargs):
            # If deep crawling is already active, call the original method to avoid recursion.
            if config and config.deep_crawl_strategy and not self.deep_crawl_active.get():
                token = self.deep_crawl_active.set(True)
--- a/crawl4ai/deep_crawling/bff_strategy.py
+++ b/crawl4ai/deep_crawling/bff_strategy.py
@@ -12,6 +12,7 @@ from . import DeepCrawlStrategy
 from ..types import AsyncWebCrawler, CrawlerRunConfig, CrawlResult, RunManyReturn
 from math import inf as infinity
 # Configurable batch size for processing items from the priority queue
 BATCH_SIZE = 10
@@ -37,15 +38,18 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
        filter_chain: FilterChain = FilterChain(),
        url_scorer: Optional[URLScorer] = None,
        include_external: bool = False,
        max_pages: int = infinity,
        logger: Optional[logging.Logger] = None,
    ):
        self.max_depth = max_depth
        self.filter_chain = filter_chain
        self.url_scorer = url_scorer
        self.include_external = include_external
        self.max_pages = max_pages
        self.logger = logger or logging.getLogger(__name__)
        self.stats = TraversalStats(start_time=datetime.now())
        self._cancel_event = asyncio.Event()
        self._pages_crawled = 0
    async def can_process_url(self, url: str, depth: int) -> bool:
        """
@@ -86,12 +90,20 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
        new_depth = current_depth + 1
        if new_depth > self.max_depth:
            return
        # If we've reached the max pages limit, don't discover new links
        remaining_capacity = self.max_pages - self._pages_crawled
        if remaining_capacity <= 0:
            self.logger.info(f"Max pages limit ({self.max_pages}) reached, stopping link discovery")
            return
        # Retrieve internal links; include external links if enabled.
        links = result.links.get("internal", [])
        if self.include_external:
            links += result.links.get("external", [])
        # If we have more links than remaining capacity, limit how many we'll process
        valid_links = []
        for link in links:
            url = link.get("href")
            if url in visited:
@@ -99,8 +111,16 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
            if not await self.can_process_url(url, new_depth):
                self.stats.urls_skipped += 1
                continue
-
+                
-            # Record the new depth.
+            valid_links.append(url)
        # If we have more valid links than capacity, limit them
        if len(valid_links) > remaining_capacity:
            valid_links = valid_links[:remaining_capacity]
            self.logger.info(f"Limiting to {remaining_capacity} URLs due to max_pages limit")
        # Record the new depths and add to next_links
        for url in valid_links:
            depths[url] = new_depth
            next_links.append((url, source_url))
@@ -123,6 +143,11 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
        depths: Dict[str, int] = {start_url: 0}
        while not queue.empty() and not self._cancel_event.is_set():
            # Stop if we've reached the max pages limit
            if self._pages_crawled >= self.max_pages:
                self.logger.info(f"Max pages limit ({self.max_pages}) reached, stopping crawl")
                break
            batch: List[Tuple[float, int, str, Optional[str]]] = []
            # Retrieve up to BATCH_SIZE items from the priority queue.
            for _ in range(BATCH_SIZE):
@@ -153,14 +178,23 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
                result.metadata["depth"] = depth
                result.metadata["parent_url"] = parent_url
                result.metadata["score"] = score
                # Count only successful crawls toward max_pages limit
                if result.success:
                    self._pages_crawled += 1
                yield result
-                # Discover new links from this result.
+                
-                new_links: List[Tuple[str, Optional[str]]] = []
+                # Only discover links from successful crawls
-                await self.link_discovery(result, result_url, depth, visited, new_links, depths)
+                if result.success:
-                for new_url, new_parent in new_links:
+                    # Discover new links from this result
-                    new_depth = depths.get(new_url, depth + 1)
+                    new_links: List[Tuple[str, Optional[str]]] = []
-                    new_score = self.url_scorer.score(new_url) if self.url_scorer else 0
+                    await self.link_discovery(result, result_url, depth, visited, new_links, depths)
-                    await queue.put((new_score, new_depth, new_url, new_parent))
+                    
                    for new_url, new_parent in new_links:
                        new_depth = depths.get(new_url, depth + 1)
                        new_score = self.url_scorer.score(new_url) if self.url_scorer else 0
                        await queue.put((new_score, new_depth, new_url, new_parent))
        # End of crawl.
--- a/crawl4ai/deep_crawling/bfs_strategy.py
+++ b/crawl4ai/deep_crawling/bfs_strategy.py
@@ -10,6 +10,8 @@ from .filters import FilterChain
 from .scorers import URLScorer
 from . import DeepCrawlStrategy  
 from ..types import AsyncWebCrawler, CrawlerRunConfig, CrawlResult
 from ..utils import normalize_url_for_deep_crawl, efficient_normalize_url_for_deep_crawl
 from math import inf as infinity
 class BFSDeepCrawlStrategy(DeepCrawlStrategy):
    """
@@ -24,17 +26,22 @@ class BFSDeepCrawlStrategy(DeepCrawlStrategy):
        self,
        max_depth: int,
        filter_chain: FilterChain = FilterChain(),
-        url_scorer: Optional[URLScorer] = None,
+        url_scorer: Optional[URLScorer] = None,        
        include_external: bool = False,
        score_threshold: float = -infinity,
        max_pages: int = infinity,
        logger: Optional[logging.Logger] = None,
    ):
        self.max_depth = max_depth
        self.filter_chain = filter_chain
        self.url_scorer = url_scorer
        self.include_external = include_external
        self.score_threshold = score_threshold
        self.max_pages = max_pages
        self.logger = logger or logging.getLogger(__name__)
        self.stats = TraversalStats(start_time=datetime.now())
        self._cancel_event = asyncio.Event()
        self._pages_crawled = 0
    async def can_process_url(self, url: str, depth: int) -> bool:
        """
@@ -72,28 +79,59 @@ class BFSDeepCrawlStrategy(DeepCrawlStrategy):
        prepares the next level of URLs.
        Each valid URL is appended to next_level as a tuple (url, parent_url)
        and its depth is tracked.
-        """
+        """            
        next_depth = current_depth + 1
        if next_depth > self.max_depth:
            return
        # If we've reached the max pages limit, don't discover new links
        remaining_capacity = self.max_pages - self._pages_crawled
        if remaining_capacity <= 0:
            self.logger.info(f"Max pages limit ({self.max_pages}) reached, stopping link discovery")
            return
        # Get internal links and, if enabled, external links.
        links = result.links.get("internal", [])
        if self.include_external:
            links += result.links.get("external", [])
        valid_links = []
        # First collect all valid links
        for link in links:
            url = link.get("href")
-            if url in visited:
+            # Strip URL fragments to avoid duplicate crawling
            # base_url = url.split('#')[0] if url else url
            base_url = normalize_url_for_deep_crawl(url, source_url)
            if base_url in visited:
                continue
            if not await self.can_process_url(url, next_depth):
                self.stats.urls_skipped += 1
                continue
-            # Score the URL if a scorer is provided. In this simple BFS
+            # Score the URL if a scorer is provided
-            # the score is not used for ordering.
+            score = self.url_scorer.score(base_url) if self.url_scorer else 0
-            score = self.url_scorer.score(url) if self.url_scorer else 0
+            
-            # attach the score to metadata if needed.
+            # Skip URLs with scores below the threshold
            if score < self.score_threshold:
                self.logger.debug(f"URL {url} skipped: score {score} below threshold {self.score_threshold}")
                self.stats.urls_skipped += 1
                continue
            valid_links.append((base_url, score))
        # If we have more valid links than capacity, sort by score and take the top ones
        if len(valid_links) > remaining_capacity:
            if self.url_scorer:
                # Sort by score in descending order
                valid_links.sort(key=lambda x: x[1], reverse=True)
            # Take only as many as we have capacity for
            valid_links = valid_links[:remaining_capacity]
            self.logger.info(f"Limiting to {remaining_capacity} URLs due to max_pages limit")
        # Process the final selected links
        for url, score in valid_links:
            # attach the score to metadata if needed
            if score:
                result.metadata = result.metadata or {}
                result.metadata["score"] = score
@@ -125,7 +163,11 @@ class BFSDeepCrawlStrategy(DeepCrawlStrategy):
            # Clone the config to disable deep crawling recursion and enforce batch mode.
            batch_config = config.clone(deep_crawl_strategy=None, stream=False)
            batch_results = await crawler.arun_many(urls=urls, config=batch_config)
-
+            
            # Update pages crawled counter - count only successful crawls
            successful_results = [r for r in batch_results if r.success]
            self._pages_crawled += len(successful_results)
            for result in batch_results:
                url = result.url
                depth = depths.get(url, 0)
@@ -134,7 +176,11 @@ class BFSDeepCrawlStrategy(DeepCrawlStrategy):
                parent_url = next((parent for (u, parent) in current_level if u == url), None)
                result.metadata["parent_url"] = parent_url
                results.append(result)
-                await self.link_discovery(result, url, depth, visited, next_level, depths)
+                
                # Only discover links from successful crawls
                if result.success:
                    # Link discovery will handle the max pages limit internally
                    await self.link_discovery(result, url, depth, visited, next_level, depths)
            current_level = next_level
@@ -161,6 +207,9 @@ class BFSDeepCrawlStrategy(DeepCrawlStrategy):
            stream_config = config.clone(deep_crawl_strategy=None, stream=True)
            stream_gen = await crawler.arun_many(urls=urls, config=stream_config)
            # Keep track of processed results for this batch
            results_count = 0
            async for result in stream_gen:
                url = result.url
                depth = depths.get(url, 0)
@@ -168,9 +217,24 @@ class BFSDeepCrawlStrategy(DeepCrawlStrategy):
                result.metadata["depth"] = depth
                parent_url = next((parent for (u, parent) in current_level if u == url), None)
                result.metadata["parent_url"] = parent_url
                # Count only successful crawls
                if result.success:
                    self._pages_crawled += 1
                results_count += 1
                yield result
-                await self.link_discovery(result, url, depth, visited, next_level, depths)
+                
-
+                # Only discover links from successful crawls
                if result.success:
                    # Link discovery will handle the max pages limit internally
                    await self.link_discovery(result, url, depth, visited, next_level, depths)
            # If we didn't get results back (e.g. due to errors), avoid getting stuck in an infinite loop
            # by considering these URLs as visited but not counting them toward the max_pages limit
            if results_count == 0 and urls:
                self.logger.warning(f"No results returned for {len(urls)} URLs, marking as visited")
            current_level = next_level
    async def shutdown(self) -> None:
--- a/crawl4ai/deep_crawling/dfs_strategy.py
+++ b/crawl4ai/deep_crawling/dfs_strategy.py
@@ -37,6 +37,7 @@ class DFSDeepCrawlStrategy(BFSDeepCrawlStrategy):
            # Clone config to disable recursive deep crawling.
            batch_config = config.clone(deep_crawl_strategy=None, stream=False)
            url_results = await crawler.arun_many(urls=[url], config=batch_config)
            for result in url_results:
                result.metadata = result.metadata or {}
                result.metadata["depth"] = depth
@@ -44,13 +45,19 @@ class DFSDeepCrawlStrategy(BFSDeepCrawlStrategy):
                if self.url_scorer:
                    result.metadata["score"] = self.url_scorer.score(url)
                results.append(result)
-
+                
-                new_links: List[Tuple[str, Optional[str]]] = []
+                # Count only successful crawls toward max_pages limit
-                await self.link_discovery(result, url, depth, visited, new_links, depths)
+                if result.success:
-                # Push new links in reverse order so the first discovered is processed next.
+                    self._pages_crawled += 1
-                for new_url, new_parent in reversed(new_links):
+                    
-                    new_depth = depths.get(new_url, depth + 1)
+                    # Only discover links from successful crawls
-                    stack.append((new_url, new_parent, new_depth))
+                    new_links: List[Tuple[str, Optional[str]]] = []
                    await self.link_discovery(result, url, depth, visited, new_links, depths)
                    # Push new links in reverse order so the first discovered is processed next.
                    for new_url, new_parent in reversed(new_links):
                        new_depth = depths.get(new_url, depth + 1)
                        stack.append((new_url, new_parent, new_depth))
        return results
    async def _arun_stream(
@@ -83,8 +90,13 @@ class DFSDeepCrawlStrategy(BFSDeepCrawlStrategy):
                    result.metadata["score"] = self.url_scorer.score(url)
                yield result
-                new_links: List[Tuple[str, Optional[str]]] = []
+                # Only count successful crawls toward max_pages limit
-                await self.link_discovery(result, url, depth, visited, new_links, depths)
+                # and only discover links from successful crawls
-                for new_url, new_parent in reversed(new_links):
+                if result.success:
-                    new_depth = depths.get(new_url, depth + 1)
+                    self._pages_crawled += 1
-                    stack.append((new_url, new_parent, new_depth))
+                    
                    new_links: List[Tuple[str, Optional[str]]] = []
                    await self.link_discovery(result, url, depth, visited, new_links, depths)
                    for new_url, new_parent in reversed(new_links):
                        new_depth = depths.get(new_url, depth + 1)
                        stack.append((new_url, new_parent, new_depth))
--- a/crawl4ai/deep_crawling/filters.py
+++ b/crawl4ai/deep_crawling/filters.py
@@ -124,6 +124,7 @@ class URLPatternFilter(URLFilter):
        "_simple_prefixes",
        "_domain_patterns",
        "_path_patterns",
        "_reverse",
    )
    PATTERN_TYPES = {
@@ -138,8 +139,10 @@ class URLPatternFilter(URLFilter):
        self,
        patterns: Union[str, Pattern, List[Union[str, Pattern]]],
        use_glob: bool = True,
        reverse: bool = False,
    ):
        super().__init__()
        self._reverse = reverse
        patterns = [patterns] if isinstance(patterns, (str, Pattern)) else patterns
        self._simple_suffixes = set()
@@ -205,36 +208,40 @@ class URLPatternFilter(URLFilter):
    @lru_cache(maxsize=10000)
    def apply(self, url: str) -> bool:
        """Hierarchical pattern matching"""
        # Quick suffix check (*.html)
        if self._simple_suffixes:
            path = url.split("?")[0]
            if path.split("/")[-1].split(".")[-1] in self._simple_suffixes:
-                self._update_stats(True)
+                result = True
-                return True
+                self._update_stats(result)
                return not result if self._reverse else result
        # Domain check
        if self._domain_patterns:
            for pattern in self._domain_patterns:
                if pattern.match(url):
-                    self._update_stats(True)
+                    result = True
-                    return True
+                    self._update_stats(result)
                    return not result if self._reverse else result
        # Prefix check (/foo/*)
        if self._simple_prefixes:
            path = url.split("?")[0]
            if any(path.startswith(p) for p in self._simple_prefixes):
-                self._update_stats(True)
+                result = True
-                return True
+                self._update_stats(result)
                return not result if self._reverse else result
        # Complex patterns
        if self._path_patterns:
            if any(p.search(url) for p in self._path_patterns):
-                self._update_stats(True)
+                result = True
-                return True
+                self._update_stats(result)
                return not result if self._reverse else result
-        self._update_stats(False)
+        result = False
-        return False
+        self._update_stats(result)
        return not result if self._reverse else result
 class ContentTypeFilter(URLFilter):
@@ -427,6 +434,11 @@ class DomainFilter(URLFilter):
        if isinstance(domains, str):
            return {domains.lower()}
        return {d.lower() for d in domains}
    @staticmethod
    def _is_subdomain(domain: str, parent_domain: str) -> bool:
        """Check if domain is a subdomain of parent_domain"""
        return domain == parent_domain or domain.endswith(f".{parent_domain}")
    @staticmethod
    @lru_cache(maxsize=10000)
@@ -444,20 +456,26 @@ class DomainFilter(URLFilter):
        domain = self._extract_domain(url)
-        # Early return for blocked domains
+        # Check for blocked domains, including subdomains
-        if domain in self._blocked_domains:
+        for blocked in self._blocked_domains:
-            self._update_stats(False)
+            if self._is_subdomain(domain, blocked):
-            return False
+                self._update_stats(False)
                return False
        # If no allowed domains specified, accept all non-blocked
        if self._allowed_domains is None:
            self._update_stats(True)
            return True
-        # Final allowed domains check
+        # Check if domain matches any allowed domain (including subdomains)
-        result = domain in self._allowed_domains
+        for allowed in self._allowed_domains:
-        self._update_stats(result)
+            if self._is_subdomain(domain, allowed):
-        return result
+                self._update_stats(True)
                return True
        # No matches found
        self._update_stats(False)
        return False
 class ContentRelevanceFilter(URLFilter):
--- a/crawl4ai/extraction_strategy.py
+++ b/crawl4ai/extraction_strategy.py
@@ -4,12 +4,10 @@ from typing import Any, List, Dict, Optional
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import json
 import time
 import os
 from .prompts import PROMPT_EXTRACT_BLOCKS, PROMPT_EXTRACT_BLOCKS_WITH_INSTRUCTION, PROMPT_EXTRACT_SCHEMA_WITH_INSTRUCTION, JSON_SCHEMA_BUILDER_XPATH
 from .config import (
-    DEFAULT_PROVIDER, PROVIDER_MODELS, 
+    DEFAULT_PROVIDER, CHUNK_TOKEN_THRESHOLD,
    CHUNK_TOKEN_THRESHOLD,
    OVERLAP_RATE,
    WORD_TOKEN_RATE,
 )
@@ -22,9 +20,7 @@ from .utils import (
    extract_xml_data,
    split_and_parse_json_objects,
    sanitize_input_encode,
    chunk_documents,
    merge_chunks,
    advanced_split,
 )
 from .models import * # noqa: F403
@@ -38,8 +34,9 @@ from .model_loader import (
    calculate_batch_size
 )
 from .types import LLMConfig
 from functools import partial
 import math
 import numpy as np
 import re
 from bs4 import BeautifulSoup
@@ -481,8 +478,7 @@ class LLMExtractionStrategy(ExtractionStrategy):
    A strategy that uses an LLM to extract meaningful content from the HTML.
    Attributes:
-        provider: The provider to use for extraction. It follows the format <provider_name>/<model_name>, e.g., "ollama/llama3.3".
+        llm_config: The LLM configuration object.
        api_token: The API token for the provider.
        instruction: The instruction to use for the LLM model.
        schema: Pydantic model schema for structured data.
        extraction_type: "block" or "schema".
@@ -490,27 +486,20 @@ class LLMExtractionStrategy(ExtractionStrategy):
        overlap_rate: Overlap between chunks.
        word_token_rate: Word to token conversion rate.
        apply_chunking: Whether to apply chunking.
        base_url: The base URL for the API request.
        api_base: The base URL for the API request.
        extra_args: Additional arguments for the API request, such as temprature, max_tokens, etc.
        verbose: Whether to print verbose output.
        usages: List of individual token usages.
        total_usage: Accumulated token usage.
    """
    _UNWANTED_PROPS = {
-            'provider' : 'Instead, use llmConfig=LlmConfig(provider="...")',
+            'provider' : 'Instead, use llm_config=LLMConfig(provider="...")',
-            'api_token' : 'Instead, use llmConfig=LlMConfig(api_token="...")',
+            'api_token' : 'Instead, use llm_config=LlMConfig(api_token="...")',
-            'base_url' : 'Instead, use llmConfig=LlmConfig(base_url="...")',
+            'base_url' : 'Instead, use llm_config=LLMConfig(base_url="...")',
-            'api_base' : 'Instead, use llmConfig=LlmConfig(base_url="...")',
+            'api_base' : 'Instead, use llm_config=LLMConfig(base_url="...")',
        }
    def __init__(
        self,
-        llmConfig: 'LLMConfig' = None,
+        llm_config: 'LLMConfig' = None,
        instruction: str = None,
        provider: str = DEFAULT_PROVIDER,
        api_token: Optional[str] = None,
        base_url: str = None,
        api_base: str = None,
        schema: Dict = None,
        extraction_type="block",
        chunk_token_threshold=CHUNK_TOKEN_THRESHOLD,
@@ -519,15 +508,18 @@ class LLMExtractionStrategy(ExtractionStrategy):
        apply_chunking=True,
        input_format: str = "markdown",
        verbose=False,
        # Deprecated arguments
        provider: str = DEFAULT_PROVIDER,
        api_token: Optional[str] = None,
        base_url: str = None,
        api_base: str = None,
        **kwargs,
    ):
        """
        Initialize the strategy with clustering parameters.
        Args:
-            llmConfig: The LLM configuration object.
+            llm_config: The LLM configuration object.
            provider: The provider to use for extraction. It follows the format <provider_name>/<model_name>, e.g., "ollama/llama3.3".
            api_token: The API token for the provider.
            instruction: The instruction to use for the LLM model.
            schema: Pydantic model schema for structured data.
            extraction_type: "block" or "schema".
@@ -535,20 +527,19 @@ class LLMExtractionStrategy(ExtractionStrategy):
            overlap_rate: Overlap between chunks.
            word_token_rate: Word to token conversion rate.
            apply_chunking: Whether to apply chunking.
            base_url: The base URL for the API request.
            api_base: The base URL for the API request.
            extra_args: Additional arguments for the API request, such as temprature, max_tokens, etc.
            verbose: Whether to print verbose output.
            usages: List of individual token usages.
            total_usage: Accumulated token usage.
            # Deprecated arguments, will be removed very soon
            provider: The provider to use for extraction. It follows the format <provider_name>/<model_name>, e.g., "ollama/llama3.3".
            api_token: The API token for the provider.
            base_url: The base URL for the API request.
            api_base: The base URL for the API request.
            extra_args: Additional arguments for the API request, such as temprature, max_tokens, etc.
        """
        super().__init__( input_format=input_format, **kwargs)
-        self.llmConfig = llmConfig
+        self.llm_config = llm_config
        self.provider = provider
        self.api_token = api_token
        self.base_url = base_url
        self.api_base = api_base
        self.instruction = instruction
        self.extract_type = extraction_type
        self.schema = schema
@@ -565,6 +556,11 @@ class LLMExtractionStrategy(ExtractionStrategy):
        self.usages = []  # Store individual usages
        self.total_usage = TokenUsage()  # Accumulated usage
        self.provider = provider
        self.api_token = api_token
        self.base_url = base_url
        self.api_base = api_base
    def __setattr__(self, name, value):
        """Handle attribute setting."""
@@ -618,10 +614,10 @@ class LLMExtractionStrategy(ExtractionStrategy):
            )
        response = perform_completion_with_backoff(
-            self.llmConfig.provider,
+            self.llm_config.provider,
            prompt_with_variables,
-            self.llmConfig.api_token,
+            self.llm_config.api_token,
-            base_url=self.llmConfig.base_url,
+            base_url=self.llm_config.base_url,
            extra_args=self.extra_args,
        )  # , json_response=self.extract_type == "schema")
        # Track usage
@@ -701,7 +697,7 @@ class LLMExtractionStrategy(ExtractionStrategy):
            overlap=int(self.chunk_token_threshold * self.overlap_rate),
        )
        extracted_content = []
-        if self.llmConfig.provider.startswith("groq/"):
+        if self.llm_config.provider.startswith("groq/"):
            # Sequential processing with a delay
            for ix, section in enumerate(merged_sections):
                extract_func = partial(self.extract, url)
@@ -1043,8 +1039,8 @@ class JsonElementExtractionStrategy(ExtractionStrategy):
        pass
    _GENERATE_SCHEMA_UNWANTED_PROPS = {
-        'provider': 'Instead, use llmConfig=LlmConfig(provider="...")',
+        'provider': 'Instead, use llm_config=LLMConfig(provider="...")',
-        'api_token': 'Instead, use llmConfig=LlMConfig(api_token="...")',
+        'api_token': 'Instead, use llm_config=LlMConfig(api_token="...")',
    }
    @staticmethod
@@ -1053,7 +1049,7 @@ class JsonElementExtractionStrategy(ExtractionStrategy):
        schema_type: str = "CSS", # or XPATH
        query: str = None,
        target_json_example: str = None,
-        llmConfig: 'LLMConfig' = None,
+        llm_config: 'LLMConfig' = None,
        provider: str = None,
        api_token: str = None,
        **kwargs
@@ -1066,9 +1062,9 @@ class JsonElementExtractionStrategy(ExtractionStrategy):
            query (str, optional): Natural language description of what data to extract
            provider (str): Legacy Parameter. LLM provider to use 
            api_token (str): Legacy Parameter. API token for LLM provider
-            llmConfig (LlmConfig): LLM configuration object
+            llm_config (LLMConfig): LLM configuration object
            prompt (str, optional): Custom prompt template to use
-            **kwargs: Additional args passed to perform_completion_with_backoff
+            **kwargs: Additional args passed to LLM processor
        Returns:
            dict: Generated schema following the JsonElementExtractionStrategy format
@@ -1130,11 +1126,12 @@ In this scenario, use your best judgment to generate the schema. Try to maximize
        try:
            # Call LLM with backoff handling
            response = perform_completion_with_backoff(
-                provider=llmConfig.provider,
+                provider=llm_config.provider,
                prompt_with_variables="\n\n".join([system_message["content"], user_message["content"]]),
                json_response = True,                
-                api_token=llmConfig.api_token,
+                api_token=llm_config.api_token,
-                **kwargs
+                base_url=llm_config.base_url,
                extra_args=kwargs
            )
            # Extract and return schema
@@ -1171,7 +1168,8 @@ class JsonCssExtractionStrategy(JsonElementExtractionStrategy):
        super().__init__(schema, **kwargs)
    def _parse_html(self, html_content: str):
-        return BeautifulSoup(html_content, "html.parser")
+        # return BeautifulSoup(html_content, "html.parser")
        return BeautifulSoup(html_content, "lxml")
    def _get_base_elements(self, parsed_html, selector: str):
        return parsed_html.select(selector)
@@ -1190,6 +1188,373 @@ class JsonCssExtractionStrategy(JsonElementExtractionStrategy):
    def _get_element_attribute(self, element, attribute: str):
        return element.get(attribute)
 class JsonLxmlExtractionStrategy(JsonElementExtractionStrategy):
    def __init__(self, schema: Dict[str, Any], **kwargs):
        kwargs["input_format"] = "html"
        super().__init__(schema, **kwargs)
        self._selector_cache = {}
        self._xpath_cache = {}
        self._result_cache = {}
        # Control selector optimization strategy
        self.use_caching = kwargs.get("use_caching", True)
        self.optimize_common_patterns = kwargs.get("optimize_common_patterns", True)
        # Load lxml dependencies once
        from lxml import etree, html
        from lxml.cssselect import CSSSelector
        self.etree = etree
        self.html_parser = html
        self.CSSSelector = CSSSelector
    def _parse_html(self, html_content: str):
        """Parse HTML content with error recovery"""
        try:
            parser = self.etree.HTMLParser(recover=True, remove_blank_text=True)
            return self.etree.fromstring(html_content, parser)
        except Exception as e:
            if self.verbose:
                print(f"Error parsing HTML, falling back to alternative method: {e}")
            try:
                return self.html_parser.fromstring(html_content)
            except Exception as e2:
                if self.verbose:
                    print(f"Critical error parsing HTML: {e2}")
                # Create minimal document as fallback
                return self.etree.Element("html")
    def _optimize_selector(self, selector_str):
        """Optimize common selector patterns for better performance"""
        if not self.optimize_common_patterns:
            return selector_str
        # Handle td:nth-child(N) pattern which is very common in table scraping
        import re
        if re.search(r'td:nth-child\(\d+\)', selector_str):
            return selector_str  # Already handled specially in _apply_selector
        # Split complex selectors into parts for optimization
        parts = selector_str.split()
        if len(parts) <= 1:
            return selector_str
        # For very long selectors, consider using just the last specific part
        if len(parts) > 3 and any(p.startswith('.') or p.startswith('#') for p in parts):
            specific_parts = [p for p in parts if p.startswith('.') or p.startswith('#')]
            if specific_parts:
                return specific_parts[-1]  # Use most specific class/id selector
        return selector_str
    def _create_selector_function(self, selector_str):
        """Create a selector function that handles all edge cases"""
        original_selector = selector_str
        # Try to optimize the selector if appropriate
        if self.optimize_common_patterns:
            selector_str = self._optimize_selector(selector_str)
        try:
            # Attempt to compile the CSS selector
            compiled = self.CSSSelector(selector_str)
            xpath = compiled.path
            # Store XPath for later use
            self._xpath_cache[selector_str] = xpath
            # Create the wrapper function that implements the selection strategy
            def selector_func(element, context_sensitive=True):
                cache_key = None
                # Use result caching if enabled
                if self.use_caching:
                    # Create a cache key based on element and selector
                    element_id = element.get('id', '') or str(hash(element))
                    cache_key = f"{element_id}::{selector_str}"
                    if cache_key in self._result_cache:
                        return self._result_cache[cache_key]
                results = []
                try:
                    # Strategy 1: Direct CSS selector application (fastest)
                    results = compiled(element)
                    # If that fails and we need context sensitivity
                    if not results and context_sensitive:
                        # Strategy 2: Try XPath with context adjustment
                        context_xpath = self._make_context_sensitive_xpath(xpath, element)
                        if context_xpath:
                            results = element.xpath(context_xpath)
                        # Strategy 3: Handle special case - nth-child
                        if not results and 'nth-child' in original_selector:
                            results = self._handle_nth_child_selector(element, original_selector)
                        # Strategy 4: Direct descendant search for class/ID selectors
                        if not results:
                            results = self._fallback_class_id_search(element, original_selector)
                        # Strategy 5: Last resort - tag name search for the final part
                        if not results:
                            parts = original_selector.split()
                            if parts:
                                last_part = parts[-1]
                                # Extract tag name from the selector
                                tag_match = re.match(r'^(\w+)', last_part)
                                if tag_match:
                                    tag_name = tag_match.group(1)
                                    results = element.xpath(f".//{tag_name}")
                    # Cache results if caching is enabled
                    if self.use_caching and cache_key:
                        self._result_cache[cache_key] = results
                except Exception as e:
                    if self.verbose:
                        print(f"Error applying selector '{selector_str}': {e}")
                return results
            return selector_func
        except Exception as e:
            if self.verbose:
                print(f"Error compiling selector '{selector_str}': {e}")
            # Fallback function for invalid selectors
            return lambda element, context_sensitive=True: []
    def _make_context_sensitive_xpath(self, xpath, element):
        """Convert absolute XPath to context-sensitive XPath"""
        try:
            # If starts with descendant-or-self, it's already context-sensitive
            if xpath.startswith('descendant-or-self::'):
                return xpath
            # Remove leading slash if present
            if xpath.startswith('/'):
                context_xpath = f".{xpath}"
            else:
                context_xpath = f".//{xpath}"
            # Validate the XPath by trying it
            try:
                element.xpath(context_xpath)
                return context_xpath
            except:
                # If that fails, try a simpler descendant search
                return f".//{xpath.split('/')[-1]}"
        except:
            return None
    def _handle_nth_child_selector(self, element, selector_str):
        """Special handling for nth-child selectors in tables"""
        import re
        results = []
        try:
            # Extract the column number from td:nth-child(N)
            match = re.search(r'td:nth-child\((\d+)\)', selector_str)
            if match:
                col_num = match.group(1)
                # Check if there's content after the nth-child part
                remaining_selector = selector_str.split(f"td:nth-child({col_num})", 1)[-1].strip()
                if remaining_selector:
                    # If there's a specific element we're looking for after the column
                    # Extract any tag names from the remaining selector
                    tag_match = re.search(r'(\w+)', remaining_selector)
                    tag_name = tag_match.group(1) if tag_match else '*'
                    results = element.xpath(f".//td[{col_num}]//{tag_name}")
                else:
                    # Just get the column cell
                    results = element.xpath(f".//td[{col_num}]")
        except Exception as e:
            if self.verbose:
                print(f"Error handling nth-child selector: {e}")
        return results
    def _fallback_class_id_search(self, element, selector_str):
        """Fallback to search by class or ID"""
        results = []
        try:
            # Extract class selectors (.classname)
            import re
            class_matches = re.findall(r'\.([a-zA-Z0-9_-]+)', selector_str)
            # Extract ID selectors (#idname)
            id_matches = re.findall(r'#([a-zA-Z0-9_-]+)', selector_str)
            # Try each class
            for class_name in class_matches:
                class_results = element.xpath(f".//*[contains(@class, '{class_name}')]")
                results.extend(class_results)
            # Try each ID (usually more specific)
            for id_name in id_matches:
                id_results = element.xpath(f".//*[@id='{id_name}']")
                results.extend(id_results)
        except Exception as e:
            if self.verbose:
                print(f"Error in fallback class/id search: {e}")
        return results
    def _get_selector(self, selector_str):
        """Get or create a selector function with caching"""
        if selector_str not in self._selector_cache:
            self._selector_cache[selector_str] = self._create_selector_function(selector_str)
        return self._selector_cache[selector_str]
    def _get_base_elements(self, parsed_html, selector: str):
        """Get all base elements using the selector"""
        selector_func = self._get_selector(selector)
        # For base elements, we don't need context sensitivity
        return selector_func(parsed_html, context_sensitive=False)
    def _get_elements(self, element, selector: str):
        """Get child elements using the selector with context sensitivity"""
        selector_func = self._get_selector(selector)
        return selector_func(element, context_sensitive=True)
    def _get_element_text(self, element) -> str:
        """Extract normalized text from element"""
        try:
            # Get all text nodes and normalize
            text = " ".join(t.strip() for t in element.xpath(".//text()") if t.strip())
            return text
        except Exception as e:
            if self.verbose:
                print(f"Error extracting text: {e}")
            # Fallback
            try:
                return element.text_content().strip()
            except:
                return ""
    def _get_element_html(self, element) -> str:
        """Get HTML string representation of element"""
        try:
            return self.etree.tostring(element, encoding='unicode', method='html')
        except Exception as e:
            if self.verbose:
                print(f"Error serializing HTML: {e}")
            return ""
    def _get_element_attribute(self, element, attribute: str):
        """Get attribute value safely"""
        try:
            return element.get(attribute)
        except Exception as e:
            if self.verbose:
                print(f"Error getting attribute '{attribute}': {e}")
            return None
    def _clear_caches(self):
        """Clear caches to free memory"""
        if self.use_caching:
            self._result_cache.clear()
 class JsonLxmlExtractionStrategy_naive(JsonElementExtractionStrategy):
    def __init__(self, schema: Dict[str, Any], **kwargs):
        kwargs["input_format"] = "html"  # Force HTML input
        super().__init__(schema, **kwargs)
        self._selector_cache = {}
    def _parse_html(self, html_content: str):
        from lxml import etree
        parser = etree.HTMLParser(recover=True)
        return etree.fromstring(html_content, parser)
    def _get_selector(self, selector_str):
        """Get a selector function that works within the context of an element"""
        if selector_str not in self._selector_cache:
            from lxml.cssselect import CSSSelector
            try:
                # Store both the compiled selector and its xpath translation
                compiled = CSSSelector(selector_str)
                # Create a function that will apply this selector appropriately
                def select_func(element):
                    try:
                        # First attempt: direct CSS selector application
                        results = compiled(element)
                        if results:
                            return results
                        # Second attempt: contextual XPath selection
                        # Convert the root-based XPath to a context-based XPath
                        xpath = compiled.path
                        # If the XPath already starts with descendant-or-self, handle it specially
                        if xpath.startswith('descendant-or-self::'):
                            context_xpath = xpath
                        else:
                            # For normal XPath expressions, make them relative to current context
                            context_xpath = f"./{xpath.lstrip('/')}"
                        results = element.xpath(context_xpath)
                        if results:
                            return results
                        # Final fallback: simple descendant search for common patterns
                        if 'nth-child' in selector_str:
                            # Handle td:nth-child(N) pattern
                            import re
                            match = re.search(r'td:nth-child\((\d+)\)', selector_str)
                            if match:
                                col_num = match.group(1)
                                sub_selector = selector_str.split(')', 1)[-1].strip()
                                if sub_selector:
                                    return element.xpath(f".//td[{col_num}]//{sub_selector}")
                                else:
                                    return element.xpath(f".//td[{col_num}]")
                        # Last resort: try each part of the selector separately
                        parts = selector_str.split()
                        if len(parts) > 1 and parts[-1]:
                            return element.xpath(f".//{parts[-1]}")
                        return []
                    except Exception as e:
                        if self.verbose:
                            print(f"Error applying selector '{selector_str}': {e}")
                        return []
                self._selector_cache[selector_str] = select_func
            except Exception as e:
                if self.verbose:
                    print(f"Error compiling selector '{selector_str}': {e}")
                # Fallback function for invalid selectors
                def fallback_func(element):
                    return []
                self._selector_cache[selector_str] = fallback_func
        return self._selector_cache[selector_str]
    def _get_base_elements(self, parsed_html, selector: str):
        selector_func = self._get_selector(selector)
        return selector_func(parsed_html)
    def _get_elements(self, element, selector: str):
        selector_func = self._get_selector(selector)
        return selector_func(element)
    def _get_element_text(self, element) -> str:
        return "".join(element.xpath(".//text()")).strip()
    def _get_element_html(self, element) -> str:
        from lxml import etree
        return etree.tostring(element, encoding='unicode')
    def _get_element_attribute(self, element, attribute: str):
        return element.get(attribute)    
 class JsonXPathExtractionStrategy(JsonElementExtractionStrategy):
    """
--- a/crawl4ai/markdown_generation_strategy.py
+++ b/crawl4ai/markdown_generation_strategy.py
@@ -1,8 +1,8 @@
 from abc import ABC, abstractmethod
 from tabnanny import verbose
 from typing import Optional, Dict, Any, Tuple
 from .models import MarkdownGenerationResult
 from .html2text import CustomHTML2Text
 # from .types import RelevantContentFilter
 from .content_filter_strategy import RelevantContentFilter
 import re
 from urllib.parse import urljoin
--- a/crawl4ai/models.py
+++ b/crawl4ai/models.py
@@ -37,13 +37,33 @@ class CrawlStatus(Enum):
    FAILED = "FAILED"
 # @dataclass
 # class CrawlStats:
 #     task_id: str
 #     url: str
 #     status: CrawlStatus
 #     start_time: Optional[datetime] = None
 #     end_time: Optional[datetime] = None
 #     memory_usage: float = 0.0
 #     peak_memory: float = 0.0
 #     error_message: str = ""
 #     @property
 #     def duration(self) -> str:
 #         if not self.start_time:
 #             return "0:00"
 #         end = self.end_time or datetime.now()
 #         duration = end - self.start_time
 #         return str(timedelta(seconds=int(duration.total_seconds())))
@dataclass
 class CrawlStats:
    task_id: str
    url: str
    status: CrawlStatus
-    start_time: Optional[datetime] = None
+    start_time: Optional[Union[datetime, float]] = None
-    end_time: Optional[datetime] = None
+    end_time: Optional[Union[datetime, float]] = None
    memory_usage: float = 0.0
    peak_memory: float = 0.0
    error_message: str = ""
@@ -52,11 +72,21 @@ class CrawlStats:
    def duration(self) -> str:
        if not self.start_time:
            return "0:00"
        # Convert start_time to datetime if it's a float
        start = self.start_time
        if isinstance(start, float):
            start = datetime.fromtimestamp(start)
        # Get end time or use current time
        end = self.end_time or datetime.now()
-        duration = end - self.start_time
+        # Convert end_time to datetime if it's a float
        if isinstance(end, float):
            end = datetime.fromtimestamp(end)
        duration = end - start
        return str(timedelta(seconds=int(duration.total_seconds())))
 class DisplayMode(Enum):
    DETAILED = "DETAILED"
    AGGREGATED = "AGGREGATED"
@@ -149,7 +179,11 @@ class CrawlResult(BaseModel):
        markdown_result = data.pop('markdown', None)
        super().__init__(**data)
        if markdown_result is not None:
-            self._markdown = markdown_result
+            self._markdown = (
                MarkdownGenerationResult(**markdown_result)
                if isinstance(markdown_result, dict)
                else markdown_result
            )
    @property
    def markdown(self):
@@ -292,6 +326,7 @@ class Media(BaseModel):
    audios: List[
        MediaItem
    ] = []  # Using MediaItem model for now, can be extended with Audio model if needed
    tables: List[Dict] = []  # Table data extracted from HTML tables
 class Links(BaseModel):
--- a/crawl4ai/proxy_strategy.py
+++ b/crawl4ai/proxy_strategy.py
@@ -1,8 +1,119 @@
 from typing import List, Dict, Optional
 from abc import ABC, abstractmethod
 from itertools import cycle
 import os
 class ProxyConfig:
    def __init__(
        self,
        server: str,
        username: Optional[str] = None,
        password: Optional[str] = None,
        ip: Optional[str] = None,
    ):
        """Configuration class for a single proxy.
        Args:
            server: Proxy server URL (e.g., "http://127.0.0.1:8080")
            username: Optional username for proxy authentication
            password: Optional password for proxy authentication
            ip: Optional IP address for verification purposes
        """
        self.server = server
        self.username = username
        self.password = password
        # Extract IP from server if not explicitly provided
        self.ip = ip or self._extract_ip_from_server()
    def _extract_ip_from_server(self) -> Optional[str]:
        """Extract IP address from server URL."""
        try:
            # Simple extraction assuming http://ip:port format
            if "://" in self.server:
                parts = self.server.split("://")[1].split(":")
                return parts[0]
            else:
                parts = self.server.split(":")
                return parts[0]
        except Exception:
            return None
    @staticmethod
    def from_string(proxy_str: str) -> "ProxyConfig":
        """Create a ProxyConfig from a string in the format 'ip:port:username:password'."""
        parts = proxy_str.split(":")
        if len(parts) == 4:  # ip:port:username:password
            ip, port, username, password = parts
            return ProxyConfig(
                server=f"http://{ip}:{port}",
                username=username,
                password=password,
                ip=ip
            )
        elif len(parts) == 2:  # ip:port only
            ip, port = parts
            return ProxyConfig(
                server=f"http://{ip}:{port}",
                ip=ip
            )
        else:
            raise ValueError(f"Invalid proxy string format: {proxy_str}")
    @staticmethod
    def from_dict(proxy_dict: Dict) -> "ProxyConfig":
        """Create a ProxyConfig from a dictionary."""
        return ProxyConfig(
            server=proxy_dict.get("server"),
            username=proxy_dict.get("username"),
            password=proxy_dict.get("password"),
            ip=proxy_dict.get("ip")
        )
    @staticmethod
    def from_env(env_var: str = "PROXIES") -> List["ProxyConfig"]:
        """Load proxies from environment variable.
        Args:
            env_var: Name of environment variable containing comma-separated proxy strings
        Returns:
            List of ProxyConfig objects
        """
        proxies = []
        try:
            proxy_list = os.getenv(env_var, "").split(",")
            for proxy in proxy_list:
                if not proxy:
                    continue
                proxies.append(ProxyConfig.from_string(proxy))
        except Exception as e:
            print(f"Error loading proxies from environment: {e}")
        return proxies
    def to_dict(self) -> Dict:
        """Convert to dictionary representation."""
        return {
            "server": self.server,
            "username": self.username,
            "password": self.password,
            "ip": self.ip
        }
    def clone(self, **kwargs) -> "ProxyConfig":
        """Create a copy of this configuration with updated values.
        Args:
            **kwargs: Key-value pairs of configuration options to update
        Returns:
            ProxyConfig: A new instance with the specified updates
        """
        config_dict = self.to_dict()
        config_dict.update(kwargs)
        return ProxyConfig.from_dict(config_dict)
 from crawl4ai.configs import ProxyConfig
 class ProxyRotationStrategy(ABC):
    """Base abstract class for proxy rotation strategies"""
--- a/crawl4ai/types.py
+++ b/crawl4ai/types.py
@@ -1,14 +1,181 @@
 from typing import TYPE_CHECKING, Union
-AsyncWebCrawler = Union['AsyncWebCrawlerType']  # Note the string literal
+# Logger types
-CrawlerRunConfig = Union['CrawlerRunConfigType']
+AsyncLoggerBase = Union['AsyncLoggerBaseType']
 AsyncLogger = Union['AsyncLoggerType']
 # Crawler core types
 AsyncWebCrawler = Union['AsyncWebCrawlerType']
 CacheMode = Union['CacheModeType']
 CrawlResult = Union['CrawlResultType']
 CrawlerHub = Union['CrawlerHubType']
 BrowserProfiler = Union['BrowserProfilerType']
 # Configuration types
 BrowserConfig = Union['BrowserConfigType']
 CrawlerRunConfig = Union['CrawlerRunConfigType']
 HTTPCrawlerConfig = Union['HTTPCrawlerConfigType']
 LLMConfig = Union['LLMConfigType']
 # Content scraping types
 ContentScrapingStrategy = Union['ContentScrapingStrategyType']
 WebScrapingStrategy = Union['WebScrapingStrategyType']
 LXMLWebScrapingStrategy = Union['LXMLWebScrapingStrategyType']
 # Proxy types
 ProxyRotationStrategy = Union['ProxyRotationStrategyType']
 RoundRobinProxyStrategy = Union['RoundRobinProxyStrategyType']
 # Extraction types
 ExtractionStrategy = Union['ExtractionStrategyType']
 LLMExtractionStrategy = Union['LLMExtractionStrategyType']
 CosineStrategy = Union['CosineStrategyType']
 JsonCssExtractionStrategy = Union['JsonCssExtractionStrategyType']
 JsonXPathExtractionStrategy = Union['JsonXPathExtractionStrategyType']
 # Chunking types
 ChunkingStrategy = Union['ChunkingStrategyType']
 RegexChunking = Union['RegexChunkingType']
 # Markdown generation types
 DefaultMarkdownGenerator = Union['DefaultMarkdownGeneratorType']
 MarkdownGenerationResult = Union['MarkdownGenerationResultType']
 # Content filter types
 RelevantContentFilter = Union['RelevantContentFilterType']
 PruningContentFilter = Union['PruningContentFilterType']
 BM25ContentFilter = Union['BM25ContentFilterType']
 LLMContentFilter = Union['LLMContentFilterType']
 # Dispatcher types
 BaseDispatcher = Union['BaseDispatcherType']
 MemoryAdaptiveDispatcher = Union['MemoryAdaptiveDispatcherType']
 SemaphoreDispatcher = Union['SemaphoreDispatcherType']
 RateLimiter = Union['RateLimiterType']
 CrawlerMonitor = Union['CrawlerMonitorType']
 DisplayMode = Union['DisplayModeType']
 RunManyReturn = Union['RunManyReturnType']
 # Docker client
 Crawl4aiDockerClient = Union['Crawl4aiDockerClientType']
 # Deep crawling types
 DeepCrawlStrategy = Union['DeepCrawlStrategyType']
 BFSDeepCrawlStrategy = Union['BFSDeepCrawlStrategyType']
 FilterChain = Union['FilterChainType']
 ContentTypeFilter = Union['ContentTypeFilterType']
 DomainFilter = Union['DomainFilterType']
 URLFilter = Union['URLFilterType']
 FilterStats = Union['FilterStatsType']
 SEOFilter = Union['SEOFilterType']
 KeywordRelevanceScorer = Union['KeywordRelevanceScorerType']
 URLScorer = Union['URLScorerType']
 CompositeScorer = Union['CompositeScorerType']
 DomainAuthorityScorer = Union['DomainAuthorityScorerType']
 FreshnessScorer = Union['FreshnessScorerType']
 PathDepthScorer = Union['PathDepthScorerType']
 BestFirstCrawlingStrategy = Union['BestFirstCrawlingStrategyType']
 DFSDeepCrawlStrategy = Union['DFSDeepCrawlStrategyType']
 DeepCrawlDecorator = Union['DeepCrawlDecoratorType']
 # Only import types during type checking to avoid circular imports
 if TYPE_CHECKING:
-    from . import (
+    # Logger imports
    from .async_logger import (
        AsyncLoggerBase as AsyncLoggerBaseType,
        AsyncLogger as AsyncLoggerType,
    )
    # Crawler core imports
    from .async_webcrawler import (
        AsyncWebCrawler as AsyncWebCrawlerType,
        CacheMode as CacheModeType,
    )
    from .models import CrawlResult as CrawlResultType
    from .hub import CrawlerHub as CrawlerHubType
    from .browser_profiler import BrowserProfiler as BrowserProfilerType
    # Configuration imports
    from .async_configs import (
        BrowserConfig as BrowserConfigType,
        CrawlerRunConfig as CrawlerRunConfigType,
-        CrawlResult as CrawlResultType,
+        HTTPCrawlerConfig as HTTPCrawlerConfigType,
        LLMConfig as LLMConfigType,
    )
    # Content scraping imports
    from .content_scraping_strategy import (
        ContentScrapingStrategy as ContentScrapingStrategyType,
        WebScrapingStrategy as WebScrapingStrategyType,
        LXMLWebScrapingStrategy as LXMLWebScrapingStrategyType,
    )
    # Proxy imports
    from .proxy_strategy import (
        ProxyRotationStrategy as ProxyRotationStrategyType,
        RoundRobinProxyStrategy as RoundRobinProxyStrategyType,
    )
    # Extraction imports
    from .extraction_strategy import (
        ExtractionStrategy as ExtractionStrategyType,
        LLMExtractionStrategy as LLMExtractionStrategyType,
        CosineStrategy as CosineStrategyType,
        JsonCssExtractionStrategy as JsonCssExtractionStrategyType,
        JsonXPathExtractionStrategy as JsonXPathExtractionStrategyType,
    )
    # Chunking imports
    from .chunking_strategy import (
        ChunkingStrategy as ChunkingStrategyType,
        RegexChunking as RegexChunkingType,
    )
    # Markdown generation imports
    from .markdown_generation_strategy import (
        DefaultMarkdownGenerator as DefaultMarkdownGeneratorType,
    )
    from .models import MarkdownGenerationResult as MarkdownGenerationResultType
    # Content filter imports
    from .content_filter_strategy import (
        RelevantContentFilter as RelevantContentFilterType,
        PruningContentFilter as PruningContentFilterType,
        BM25ContentFilter as BM25ContentFilterType,
        LLMContentFilter as LLMContentFilterType,
    )
    # Dispatcher imports
    from .async_dispatcher import (
        BaseDispatcher as BaseDispatcherType,
        MemoryAdaptiveDispatcher as MemoryAdaptiveDispatcherType,
        SemaphoreDispatcher as SemaphoreDispatcherType,
        RateLimiter as RateLimiterType,
        CrawlerMonitor as CrawlerMonitorType,
        DisplayMode as DisplayModeType,
        RunManyReturn as RunManyReturnType,
    )
    # Docker client
    from .docker_client import Crawl4aiDockerClient as Crawl4aiDockerClientType
    # Deep crawling imports
    from .deep_crawling import (
        DeepCrawlStrategy as DeepCrawlStrategyType,
        BFSDeepCrawlStrategy as BFSDeepCrawlStrategyType,
        FilterChain as FilterChainType,
        ContentTypeFilter as ContentTypeFilterType,
        DomainFilter as DomainFilterType,
        URLFilter as URLFilterType,
        FilterStats as FilterStatsType,
        SEOFilter as SEOFilterType,
        KeywordRelevanceScorer as KeywordRelevanceScorerType,
        URLScorer as URLScorerType,
        CompositeScorer as CompositeScorerType,
        DomainAuthorityScorer as DomainAuthorityScorerType,
        FreshnessScorer as FreshnessScorerType,
        PathDepthScorer as PathDepthScorerType,
        BestFirstCrawlingStrategy as BestFirstCrawlingStrategyType,
        DFSDeepCrawlStrategy as DFSDeepCrawlStrategyType,
        DeepCrawlDecorator as DeepCrawlDecoratorType,
    )
--- a/crawl4ai/user_agent_generator.py
+++ b/crawl4ai/user_agent_generator.py
@@ -3,12 +3,11 @@ from typing import Optional, Literal, List, Dict, Tuple
 import re
 from abc import ABC, abstractmethod
 import random
 from fake_useragent import UserAgent
 import requests
 from lxml import html
 import json
-from typing import Optional, List, Union, Dict
+from typing import Union
 class UAGen(ABC):
   @abstractmethod
--- a/crawl4ai/utils.py
+++ b/crawl4ai/utils.py
@@ -1,5 +1,4 @@
 import time
 from urllib.parse import urlparse
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from bs4 import BeautifulSoup, Comment, element, Tag, NavigableString
 import json
@@ -33,6 +32,8 @@ import hashlib
 from urllib.robotparser import RobotFileParser
 import aiohttp
 from urllib.parse import urlparse, urlunparse
 from functools import lru_cache
 from packaging import version
 from . import __version__
@@ -1962,6 +1963,82 @@ def normalize_url(href, base_url):
    return normalized
 def normalize_url_for_deep_crawl(href, base_url):
    """Normalize URLs to ensure consistent format"""
    from urllib.parse import urljoin, urlparse, urlunparse, parse_qs, urlencode
    # Handle None or empty values
    if not href:
        return None
    # Use urljoin to handle relative URLs
    full_url = urljoin(base_url, href.strip())
    # Parse the URL for normalization
    parsed = urlparse(full_url)
    # Convert hostname to lowercase
    netloc = parsed.netloc.lower()
    # Remove fragment entirely
    fragment = ''
    # Normalize query parameters if needed
    query = parsed.query
    if query:
        # Parse query parameters
        params = parse_qs(query)
        # Remove tracking parameters (example - customize as needed)
        tracking_params = ['utm_source', 'utm_medium', 'utm_campaign', 'ref', 'fbclid']
        for param in tracking_params:
            if param in params:
                del params[param]
        # Rebuild query string, sorted for consistency
        query = urlencode(params, doseq=True) if params else ''
    # Build normalized URL
    normalized = urlunparse((
        parsed.scheme,
        netloc,
        parsed.path.rstrip('/') or '/',  # Normalize trailing slash
        parsed.params,
        query,
        fragment
    ))
    return normalized
@lru_cache(maxsize=10000)
 def efficient_normalize_url_for_deep_crawl(href, base_url):
    """Efficient URL normalization with proper parsing"""
    from urllib.parse import urljoin
    if not href:
        return None
    # Resolve relative URLs
    full_url = urljoin(base_url, href.strip())
    # Use proper URL parsing
    parsed = urlparse(full_url)
    # Only perform the most critical normalizations
    # 1. Lowercase hostname
    # 2. Remove fragment
    normalized = urlunparse((
        parsed.scheme,
        parsed.netloc.lower(),
        parsed.path,
        parsed.params,
        parsed.query,
        ''  # Remove fragment
    ))
    return normalized
 def normalize_url_tmp(href, base_url):
    """Normalize URLs to ensure consistent format"""
    # Extract protocol and domain from base URL
--- a/deploy/docker/README.md
+++ b/deploy/docker/README.md
@@ -352,7 +352,10 @@ Example:
 from crawl4ai import CrawlerRunConfig, PruningContentFilter
 config = CrawlerRunConfig(
-    content_filter=PruningContentFilter(threshold=0.48)
+    markdown_generator=DefaultMarkdownGenerator(
        content_filter=PruningContentFilter(threshold=0.48, threshold_type="fixed")
    ),
    cache_mode= CacheMode.BYPASS
 )
 print(config.dump())  # Use this JSON in your API calls
 ```
@@ -595,8 +598,8 @@ curl http://localhost:8000/health
 ## Complete Examples
 Check out the `examples` folder in our repository for full working examples! Here are two to get you started:
-[Using Client SDK](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_sdk_example.py)
+[Using Client SDK](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_sdk.py)
-[Using REST API](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_rest_api_example.py)
+[Using REST API](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_rest_api.py)
 ## Server Configuration
--- a/deploy/docker/api.py
+++ b/deploy/docker/api.py
@@ -18,7 +18,8 @@ from crawl4ai import (
    CacheMode,
    BrowserConfig,
    MemoryAdaptiveDispatcher,
-    RateLimiter
+    RateLimiter, 
    LLMConfig
 )
 from crawl4ai.utils import perform_completion_with_backoff
 from crawl4ai.content_filter_strategy import (
@@ -103,8 +104,10 @@ async def process_llm_extraction(
        else:
            api_key = os.environ.get(config["llm"].get("api_key_env", None), "")
        llm_strategy = LLMExtractionStrategy(
-            provider=config["llm"]["provider"],
+            llm_config=LLMConfig(
-            api_token=api_key,
+                provider=config["llm"]["provider"],
                api_token=api_key
            ),
            instruction=instruction,
            schema=json.loads(schema) if schema else None,
        )
@@ -164,8 +167,10 @@ async def handle_markdown_request(
                FilterType.FIT: PruningContentFilter(),
                FilterType.BM25: BM25ContentFilter(user_query=query or ""),
                FilterType.LLM: LLMContentFilter(
-                    provider=config["llm"]["provider"],
+                    llm_config=LLMConfig(
-                    api_token=os.environ.get(config["llm"].get("api_key_env", None), ""),
+                        provider=config["llm"]["provider"],
                        api_token=os.environ.get(config["llm"].get("api_key_env", None), ""),
                    ),
                    instruction=query or "Extract main content"
                )
            }[filter_type]
--- a/deploy/docker/auth.py
+++ b/deploy/docker/auth.py
@@ -10,7 +10,7 @@ from pydantic.main import BaseModel
 import base64
 instance = JWT()
-security = HTTPBearer()
+security = HTTPBearer(auto_error=False)
 SECRET_KEY = os.environ.get("SECRET_KEY", "mysecret")
 ACCESS_TOKEN_EXPIRE_MINUTES = 60
@@ -30,6 +30,9 @@ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -
 def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)) -> Dict:
    """Verify the JWT token from the Authorization header."""
    if credentials is None:
        return None
    token = credentials.credentials
    verifying_key = get_jwk_from_secret(SECRET_KEY)
    try:
@@ -38,9 +41,15 @@ def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security))
    except Exception:
        raise HTTPException(status_code=401, detail="Invalid or expired token")
 def get_token_dependency(config: Dict):
-    """Return the token dependency if JWT is enabled, else None."""
+    """Return the token dependency if JWT is enabled, else a function that returns None."""
-    return verify_token if config.get("security", {}).get("jwt_enabled", False) else None
+
    if config.get("security", {}).get("jwt_enabled", False):
        return verify_token
    else:
        return lambda: None
 class TokenRequest(BaseModel):
    email: EmailStr
--- a/deploy/docker/config.yml
+++ b/deploy/docker/config.yml
@@ -3,7 +3,7 @@ app:
  title: "Crawl4AI API"
  version: "1.0.0"
  host: "0.0.0.0"
-  port: 8000
+  port: 8020
  reload: True
  timeout_keep_alive: 300
@@ -38,8 +38,8 @@ rate_limiting:
 # Security Configuration
 security:
-  enabled: true 
+  enabled: false 
-  jwt_enabled: true 
+  jwt_enabled: false 
  https_redirect: false
  trusted_hosts: ["*"]
  headers:
--- a/docs/examples/crypto_analysis_example.py
+++ b/docs/examples/crypto_analysis_example.py
@@ -0,0 +1,230 @@
 """
 Crawl4AI Crypto Trading Analysis Demo
 Author: Unclecode
 Date: 2024-03-15
 This script demonstrates advanced crypto market analysis using:
 1. Web scraping of real-time CoinMarketCap data
 2. Smart table extraction with layout detection
 3. Hedge fund-grade financial metrics
 4. Interactive visualizations for trading signals
 Key Features:
 - Volume Anomaly Detection: Finds unusual trading activity
 - Liquidity Power Score: Identifies easily tradable assets
 - Volatility-Weighted Momentum: Surface sustainable trends
 - Smart Money Signals: Algorithmic buy/hold recommendations
 """
 import asyncio
 import pandas as pd
 import plotly.express as px
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LXMLWebScrapingStrategy
 from crawl4ai import CrawlResult
 from typing import List
 from IPython.display import HTML
 class CryptoAlphaGenerator:
    """
    Advanced crypto analysis engine that transforms raw web data into:
    - Volume anomaly flags
    - Liquidity scores
    - Momentum-risk ratios
    - Machine learning-inspired trading signals
    Methods:
    analyze_tables(): Process raw tables into trading insights
    create_visuals(): Generate institutional-grade visualizations
    generate_insights(): Create plain English trading recommendations
    """
    def clean_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Convert crypto market data to machine-readable format
        Handles currency symbols, units (B=Billions), and percentage values
        """
        # Clean numeric columns
        df['Price'] = df['Price'].str.replace('[^\d.]', '', regex=True).astype(float)
        df['Market Cap'] = df['Market Cap'].str.extract(r'\$([\d.]+)B')[0].astype(float) * 1e9
        df['Volume(24h)'] = df['Volume(24h)'].str.extract(r'\$([\d.]+)B')[0].astype(float) * 1e9
        # Convert percentages to decimal values
        for col in ['1h %', '24h %', '7d %']:
            df[col] = df[col].str.replace('%', '').astype(float) / 100
        return df
    def calculate_metrics(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Compute advanced trading metrics used by quantitative funds:
        1. Volume/Market Cap Ratio - Measures liquidity efficiency
           (High ratio = Underestimated attention)
        2. Volatility Score - Risk-adjusted momentum potential
           (STD of 1h/24h/7d returns)
        3. Momentum Score - Weighted average of returns
           (1h:30% + 24h:50% + 7d:20%)
        4. Volume Anomaly - 3σ deviation detection
           (Flags potential insider activity)
        """
        # Liquidity Metrics
        df['Volume/Market Cap Ratio'] = df['Volume(24h)'] / df['Market Cap']
        # Risk Metrics
        df['Volatility Score'] = df[['1h %','24h %','7d %']].std(axis=1)
        # Momentum Metrics
        df['Momentum Score'] = (df['1h %']*0.3 + df['24h %']*0.5 + df['7d %']*0.2)
        # Anomaly Detection
        median_vol = df['Volume(24h)'].median()
        df['Volume Anomaly'] = df['Volume(24h)'] > 3 * median_vol
        # Value Flags
        df['Undervalued Flag'] = (df['Market Cap'] < 1e9) & (df['Momentum Score'] > 0.05)
        df['Liquid Giant'] = (df['Volume/Market Cap Ratio'] > 0.15) & (df['Market Cap'] > 1e9)
        return df
    def create_visuals(self, df: pd.DataFrame) -> dict:
        """
        Generate three institutional-grade visualizations:
        1. 3D Market Map - X:Size, Y:Liquidity, Z:Momentum
        2. Liquidity Tree - Color:Volume Efficiency
        3. Momentum Leaderboard - Top sustainable movers
        """
        # 3D Market Overview
        fig1 = px.scatter_3d(
            df, 
            x='Market Cap', 
            y='Volume/Market Cap Ratio',
            z='Momentum Score',
            size='Volatility Score',
            color='Volume Anomaly',
            hover_name='Name',
            title='Smart Money Market Map: Spot Overlooked Opportunities',
            labels={'Market Cap': 'Size (Log $)', 'Volume/Market Cap Ratio': 'Liquidity Power'},
            log_x=True,
            template='plotly_dark'
        )
        # Liquidity Efficiency Tree
        fig2 = px.treemap(
            df,
            path=['Name'], 
            values='Market Cap',
            color='Volume/Market Cap Ratio',
            hover_data=['Momentum Score'],
            title='Liquidity Forest: Green = High Trading Efficiency',
            color_continuous_scale='RdYlGn'
        )
        # Momentum Leaders
        fig3 = px.bar(
            df.sort_values('Momentum Score', ascending=False).head(10),
            x='Name', 
            y='Momentum Score',
            color='Volatility Score',
            title='Sustainable Momentum Leaders (Low Volatility + High Growth)',
            text='7d %',
            template='plotly_dark'
        )
        return {'market_map': fig1, 'liquidity_tree': fig2, 'momentum_leaders': fig3}
    def generate_insights(self, df: pd.DataFrame) -> str:
        """
        Create plain English trading insights explaining:
        - Volume spikes and their implications
        - Risk-reward ratios of top movers
        - Liquidity warnings for large positions
        """
        top_coin = df.sort_values('Momentum Score', ascending=False).iloc[0]
        anomaly_coins = df[df['Volume Anomaly']].sort_values('Volume(24h)', ascending=False)
        report = f"""
        🚀 Top Alpha Opportunity: {top_coin['Name']}
        - Momentum Score: {top_coin['Momentum Score']:.2%} (Top 1%)
        - Risk-Reward Ratio: {top_coin['Momentum Score']/top_coin['Volatility Score']:.1f}
        - Liquidity Warning: {'✅ Safe' if top_coin['Liquid Giant'] else '⚠️ Thin Markets'}
        🔥 Volume Spikes Detected ({len(anomaly_coins)} coins):
        {anomaly_coins[['Name', 'Volume(24h)']].head(3).to_markdown(index=False)}
        💡 Smart Money Tip: Coins with Volume/Cap > 15% and Momentum > 5% 
        historically outperform by 22% weekly returns.
        """
        return report
 async def main():
    """
    Main execution flow:
    1. Configure headless browser for scraping
    2. Extract live crypto market data
    3. Clean and analyze using hedge fund models
    4. Generate visualizations and insights
    5. Output professional trading report
    """
    # Configure browser with anti-detection features
    browser_config = BrowserConfig(
        headless=True,
        stealth=True,
        block_resources=["image", "media"]
    )
    # Initialize crawler with smart table detection
    crawler = AsyncWebCrawler(config=browser_config)
    await crawler.start()
    try:
        # Set up scraping parameters
        crawl_config = CrawlerRunConfig(
            cache_mode=CacheMode.BYPASS,
            scraping_strategy=LXMLWebScrapingStrategy(
                table_score_threshold=8,  # Strict table detection
                keep_data_attributes=True
            )
        )
        # Execute market data extraction
        results: List[CrawlResult] = await crawler.arun(
            url='https://coinmarketcap.com/?page=1',
            config=crawl_config
        )
        # Process results
        for result in results:
            if result.success and result.media['tables']:
                # Extract primary market table
                raw_df = pd.DataFrame(
                    result.media['tables'][0]['rows'],
                    columns=result.media['tables'][0]['headers']
                )
                # Initialize analysis engine
                analyzer = CryptoAlphaGenerator()
                clean_df = analyzer.clean_data(raw_df)
                analyzed_df = analyzer.calculate_metrics(clean_df)
                # Generate outputs
                visuals = analyzer.create_visuals(analyzed_df)
                insights = analyzer.generate_insights(analyzed_df)
                # Save visualizations
                visuals['market_map'].write_html("market_map.html")
                visuals['liquidity_tree'].write_html("liquidity_tree.html")
                # Display results
                print("🔑 Key Trading Insights:")
                print(insights)
                print("\n📊 Open 'market_map.html' for interactive analysis")
    finally:
        await crawler.close()
 if __name__ == "__main__":
    asyncio.run(main())
--- a/docs/examples/deepcrawl_example.py
+++ b/docs/examples/deepcrawl_example.py
@@ -65,7 +65,6 @@ async def basic_deep_crawl():
            f"\n✅ Performance: {len(results)} pages in {time.perf_counter() - start_time:.2f} seconds"
        )
 # 2️⃣ Stream vs. Non-Stream Execution
 async def stream_vs_nonstream():
    """
@@ -80,7 +79,7 @@ async def stream_vs_nonstream():
    base_config = CrawlerRunConfig(
        deep_crawl_strategy=BFSDeepCrawlStrategy(max_depth=1, include_external=False),
        scraping_strategy=LXMLWebScrapingStrategy(),
-        verbose=True,
+        verbose=False,
    )
    async with AsyncWebCrawler() as crawler:
@@ -127,7 +126,6 @@ async def stream_vs_nonstream():
        print(f"  ✅ All results: {time.perf_counter() - start_time:.2f} seconds")
        print("\n🔍 Key Takeaway: Streaming allows processing results immediately")
 # 3️⃣ Introduce Filters & Scorers
 async def filters_and_scorers():
    """
@@ -212,11 +210,11 @@ async def filters_and_scorers():
        # Create a keyword relevance scorer
        keyword_scorer = KeywordRelevanceScorer(
-            keywords=["crawl", "example", "async", "configuration","javascript","css"], weight=0.3
+            keywords=["crawl", "example", "async", "configuration","javascript","css"], weight=1
        )
        config = CrawlerRunConfig(
-            deep_crawl_strategy=BestFirstCrawlingStrategy(  # Note: Changed to BestFirst
+            deep_crawl_strategy=BestFirstCrawlingStrategy(  
                max_depth=1, include_external=False, url_scorer=keyword_scorer
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
@@ -236,11 +234,172 @@ async def filters_and_scorers():
        print(f"  ✅ Crawler prioritized {len(results)} pages by relevance score")
        print("  🔍 Note: BestFirstCrawlingStrategy visits highest-scoring pages first")
 # 4️⃣ Advanced Filters
 async def advanced_filters():
    """
    PART 4: Demonstrates advanced filtering techniques for specialized crawling.
-# 4️⃣ Wrap-Up and Key Takeaways
+    This function covers:
    - SEO filters
    - Text relevancy filtering
    - Combining advanced filters
    """
    print("\n===== ADVANCED FILTERS =====")
    async with AsyncWebCrawler() as crawler:
        # SEO FILTER EXAMPLE
        print("\n📊 EXAMPLE 1: SEO FILTERS")
        print(
            "Quantitative SEO quality assessment filter based searching keywords in the head section"
        )
        seo_filter = SEOFilter(
            threshold=0.5, keywords=["dynamic", "interaction", "javascript"]
        )
        config = CrawlerRunConfig(
            deep_crawl_strategy=BFSDeepCrawlStrategy(
                max_depth=1, filter_chain=FilterChain([seo_filter])
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            verbose=True,
            cache_mode=CacheMode.BYPASS,
        )
        results = await crawler.arun(url="https://docs.crawl4ai.com", config=config)
        print(f"  ✅ Found {len(results)} pages with relevant keywords")
        for result in results:
            print(f"  → {result.url}")
        # ADVANCED TEXT RELEVANCY FILTER
        print("\n📊 EXAMPLE 2: ADVANCED TEXT RELEVANCY FILTER")
        # More sophisticated content relevance filter
        relevance_filter = ContentRelevanceFilter(
            query="Interact with the web using your authentic digital identity",
            threshold=0.7,
        )
        config = CrawlerRunConfig(
            deep_crawl_strategy=BFSDeepCrawlStrategy(
                max_depth=1, filter_chain=FilterChain([relevance_filter])
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            verbose=True,
            cache_mode=CacheMode.BYPASS,
        )
        results = await crawler.arun(url="https://docs.crawl4ai.com", config=config)
        print(f"  ✅ Found {len(results)} pages")
        for result in results:
            relevance_score = result.metadata.get("relevance_score", 0)
            print(f"  → Score: {relevance_score:.2f} | {result.url}")
 # 5️⃣ Max Pages and Score Thresholds
 async def max_pages_and_thresholds():
    """
    PART 5: Demonstrates using max_pages and score_threshold parameters with different strategies.
    This function shows:
    - How to limit the number of pages crawled
    - How to set score thresholds for more targeted crawling
    - Comparing BFS, DFS, and Best-First strategies with these parameters
    """
    print("\n===== MAX PAGES AND SCORE THRESHOLDS =====")
    from crawl4ai.deep_crawling import DFSDeepCrawlStrategy
    async with AsyncWebCrawler() as crawler:
        # Define a common keyword scorer for all examples
        keyword_scorer = KeywordRelevanceScorer(
            keywords=["browser", "crawler", "web", "automation"], 
            weight=1.0
        )
        # EXAMPLE 1: BFS WITH MAX PAGES
        print("\n📊 EXAMPLE 1: BFS STRATEGY WITH MAX PAGES LIMIT")
        print("  Limit the crawler to a maximum of 5 pages")
        bfs_config = CrawlerRunConfig(
            deep_crawl_strategy=BFSDeepCrawlStrategy(
                max_depth=2, 
                include_external=False,
                url_scorer=keyword_scorer,
                max_pages=5  # Only crawl 5 pages
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            verbose=True,
            cache_mode=CacheMode.BYPASS,
        )
        results = await crawler.arun(url="https://docs.crawl4ai.com", config=bfs_config)
        print(f"  ✅ Crawled exactly {len(results)} pages as specified by max_pages")
        for result in results:
            depth = result.metadata.get("depth", 0)
            print(f"  → Depth: {depth} | {result.url}")
        # EXAMPLE 2: DFS WITH SCORE THRESHOLD
        print("\n📊 EXAMPLE 2: DFS STRATEGY WITH SCORE THRESHOLD")
        print("  Only crawl pages with a relevance score above 0.5")
        dfs_config = CrawlerRunConfig(
            deep_crawl_strategy=DFSDeepCrawlStrategy(
                max_depth=2,
                include_external=False, 
                url_scorer=keyword_scorer,
                score_threshold=0.7,  # Only process URLs with scores above 0.5
                max_pages=10
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            verbose=True,
            cache_mode=CacheMode.BYPASS,
        )
        results = await crawler.arun(url="https://docs.crawl4ai.com", config=dfs_config)
        print(f"  ✅ Crawled {len(results)} pages with scores above threshold")
        for result in results:
            score = result.metadata.get("score", 0)
            depth = result.metadata.get("depth", 0)
            print(f"  → Depth: {depth} | Score: {score:.2f} | {result.url}")
        # EXAMPLE 3: BEST-FIRST WITH BOTH CONSTRAINTS
        print("\n📊 EXAMPLE 3: BEST-FIRST STRATEGY WITH BOTH CONSTRAINTS")
        print("  Limit to 7 pages with scores above 0.3, prioritizing highest scores")
        bf_config = CrawlerRunConfig(
            deep_crawl_strategy=BestFirstCrawlingStrategy(
                max_depth=2,
                include_external=False,
                url_scorer=keyword_scorer,
                max_pages=7,          # Limit to 7 pages total
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            verbose=True,
            cache_mode=CacheMode.BYPASS,
            stream=True,
        )
        results = []
        async for result in await crawler.arun(url="https://docs.crawl4ai.com", config=bf_config):
            results.append(result)
            score = result.metadata.get("score", 0)
            depth = result.metadata.get("depth", 0)
            print(f"  → Depth: {depth} | Score: {score:.2f} | {result.url}")
        print(f"  ✅ Crawled {len(results)} high-value pages with scores above 0.3")
        if results:
            avg_score = sum(r.metadata.get('score', 0) for r in results) / len(results)
            print(f"  ✅ Average score: {avg_score:.2f}")
            print("  🔍 Note: BestFirstCrawlingStrategy visited highest-scoring pages first")
 # 6️⃣ Wrap-Up and Key Takeaways
 async def wrap_up():
    """
-    PART 4: Wrap-Up and Key Takeaways
+    PART 6: Wrap-Up and Key Takeaways
    Summarize the key concepts learned in this tutorial.
    """
@@ -308,71 +467,6 @@ async def wrap_up():
        print(f"  Depth {depth}: {count} pages")
 # 5️⃣ Advanced Filters
 async def advanced_filters():
    """
    PART 5: Demonstrates advanced filtering techniques for specialized crawling.
    This function covers:
    - SEO filters
    - Text relevancy filtering
    - Combining advanced filters
    """
    print("\n===== ADVANCED FILTERS =====")
    async with AsyncWebCrawler() as crawler:
        # SEO FILTER EXAMPLE
        print("\n📊 EXAMPLE 1: SEO FILTERS")
        print(
            "Quantitative SEO quality assessment filter based searching keywords in the head section"
        )
        seo_filter = SEOFilter(
            threshold=0.5, keywords=["dynamic", "interaction", "javascript"]
        )
        config = CrawlerRunConfig(
            deep_crawl_strategy=BFSDeepCrawlStrategy(
                max_depth=1, filter_chain=FilterChain([seo_filter])
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            verbose=True,
            cache_mode=CacheMode.BYPASS,
        )
        results = await crawler.arun(url="https://docs.crawl4ai.com", config=config)
        print(f"  ✅ Found {len(results)} pages with relevant keywords")
        for result in results:
            print(f"  → {result.url}")
        # ADVANCED TEXT RELEVANCY FILTER
        print("\n📊 EXAMPLE 2: ADVANCED TEXT RELEVANCY FILTER")
        # More sophisticated content relevance filter
        relevance_filter = ContentRelevanceFilter(
            query="Interact with the web using your authentic digital identity",
            threshold=0.7,
        )
        config = CrawlerRunConfig(
            deep_crawl_strategy=BFSDeepCrawlStrategy(
                max_depth=1, filter_chain=FilterChain([relevance_filter])
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            verbose=True,
            cache_mode=CacheMode.BYPASS,
        )
        results = await crawler.arun(url="https://docs.crawl4ai.com", config=config)
        print(f"  ✅ Found {len(results)} pages")
        for result in results:
            relevance_score = result.metadata.get("relevance_score", 0)
            print(f"  → Score: {relevance_score:.2f} | {result.url}")
 # Main function to run the entire tutorial
 async def run_tutorial():
    """
    Executes all tutorial sections in sequence.
@@ -387,8 +481,9 @@ async def run_tutorial():
        basic_deep_crawl,
        stream_vs_nonstream,
        filters_and_scorers,
-        wrap_up,
+        max_pages_and_thresholds, 
        advanced_filters,
        wrap_up,
    ]
    for section in tutorial_sections:
@@ -398,7 +493,6 @@ async def run_tutorial():
    print("You now have a comprehensive understanding of deep crawling with Crawl4AI.")
    print("For more information, check out https://docs.crawl4ai.com")
 # Execute the tutorial when run directly
 if __name__ == "__main__":
    asyncio.run(run_tutorial())
--- a/docs/examples/dispatcher_example.py
+++ b/docs/examples/dispatcher_example.py
@@ -39,7 +39,7 @@ async def memory_adaptive_with_rate_limit(urls, browser_config, run_config):
    start = time.perf_counter()
    async with AsyncWebCrawler(config=browser_config) as crawler:
        dispatcher = MemoryAdaptiveDispatcher(
-            memory_threshold_percent=70.0,
+            memory_threshold_percent=95.0,
            max_session_permit=10,
            rate_limiter=RateLimiter(
                base_delay=(1.0, 2.0), max_delay=30.0, max_retries=2
--- a/docs/examples/extraction_strategies_examples.py
+++ b/docs/examples/extraction_strategies_examples.py
@@ -11,7 +11,7 @@ import asyncio
 import os
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 from crawl4ai.extraction_strategy import (
    LLMExtractionStrategy,
    JsonCssExtractionStrategy,
@@ -61,19 +61,19 @@ async def main():
    # 1. LLM Extraction with different input formats
    markdown_strategy = LLMExtractionStrategy(
-        llmConfig = LlmConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY")),
+        llm_config = LLMConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY")),
        instruction="Extract product information including name, price, and description",
    )
    html_strategy = LLMExtractionStrategy(
        input_format="html",
-        llmConfig=LlmConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY")),
+        llm_config=LLMConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY")),
        instruction="Extract product information from HTML including structured data",
    )
    fit_markdown_strategy = LLMExtractionStrategy(
        input_format="fit_markdown",
-        llmConfig=LlmConfig(provider="openai/gpt-4o-mini",api_token=os.getenv("OPENAI_API_KEY")),
+        llm_config=LLMConfig(provider="openai/gpt-4o-mini",api_token=os.getenv("OPENAI_API_KEY")),
        instruction="Extract product information from cleaned markdown",
    )
--- a/docs/examples/hello_world.py
+++ b/docs/examples/hello_world.py
@@ -16,9 +16,9 @@ async def main():
        crawler_config = CrawlerRunConfig(
            cache_mode=CacheMode.BYPASS,
            markdown_generator=DefaultMarkdownGenerator(
-                content_filter=PruningContentFilter(
+                # content_filter=PruningContentFilter(
-                    threshold=0.48, threshold_type="fixed", min_word_threshold=0
+                #     threshold=0.48, threshold_type="fixed", min_word_threshold=0
-                )
+                # )
            ),
        )
        result : CrawlResult = await crawler.arun(
--- a/docs/examples/identity_based_browsing.py
+++ b/docs/examples/identity_based_browsing.py
@@ -0,0 +1,108 @@
 """
 Identity-Based Browsing Example with Crawl4AI
 This example demonstrates how to:
 1. Create a persistent browser profile interactively
 2. List available profiles
 3. Use a saved profile for crawling authenticated sites
 4. Delete profiles when no longer needed
 Uses the new BrowserProfiler class for profile management.
 """
 import asyncio
 from crawl4ai import AsyncWebCrawler, BrowserConfig
 from crawl4ai.browser_profiler import BrowserProfiler
 from crawl4ai.async_logger import AsyncLogger
 from colorama import Fore, Style, init
 # Initialize colorama
 init()
 # Create a shared logger instance
 logger = AsyncLogger(verbose=True)
 # Create a shared BrowserProfiler instance
 profiler = BrowserProfiler(logger=logger)
 async def crawl_with_profile(profile_path, url):
    """Use a profile to crawl an authenticated page"""
    logger.info(f"\nCrawling {Fore.CYAN}{url}{Style.RESET_ALL} using profile at {Fore.YELLOW}{profile_path}{Style.RESET_ALL}", tag="CRAWL")
    # Create browser config with the profile path
    browser_config = BrowserConfig(
        headless=False,  # Set to False if you want to see the browser window
        use_managed_browser=True,  # Required for persistent profiles
        user_data_dir=profile_path
    )
    start_time = asyncio.get_event_loop().time()
    # Initialize crawler with the browser config
    async with AsyncWebCrawler(config=browser_config) as crawler:
        # Crawl the URL - You should have access to authenticated content now
        result = await crawler.arun(url)
        elapsed_time = asyncio.get_event_loop().time() - start_time
        if result.success:
            # Use url_status method for consistent logging
            logger.url_status(url, True, elapsed_time, tag="CRAWL")
            # Print page title or some indication of success
            title = result.metadata.get("title", "")
            logger.success(f"Page title: {Fore.GREEN}{title}{Style.RESET_ALL}", tag="CRAWL")
            return result
        else:
            # Log error status
            logger.error_status(url, result.error_message, tag="CRAWL")
            return None
 async def main():
    logger.info(f"{Fore.CYAN}Identity-Based Browsing Example with Crawl4AI{Style.RESET_ALL}", tag="DEMO")
    logger.info("This example demonstrates using profiles for authenticated browsing", tag="DEMO")
    # Choose between interactive mode and automatic mode
    mode = input(f"{Fore.CYAN}Run in [i]nteractive mode or [a]utomatic mode? (i/a): {Style.RESET_ALL}").lower()
    if mode == 'i':
        # Interactive profile management - use the interactive_manager method
        # Pass the crawl_with_profile function as the callback for the "crawl a website" option
        await profiler.interactive_manager(crawl_callback=crawl_with_profile)
    else:
        # Automatic mode - simplified example
        profiles = profiler.list_profiles()
        if not profiles:
            # Create a new profile if none exists
            logger.info("No profiles found. Creating a new one...", tag="DEMO")
            profile_path = await profiler.create_profile()
            if not profile_path:
                logger.error("Cannot proceed without a valid profile", tag="DEMO")
                return
        else:
            # Use the first (most recent) profile
            profile_path = profiles[0]["path"]
            logger.info(f"Using existing profile: {Fore.CYAN}{profiles[0]['name']}{Style.RESET_ALL}", tag="DEMO")
        # Example: Crawl an authenticated page
        urls_to_crawl = [
            "https://github.com/settings/profile",  # GitHub requires login
            # "https://twitter.com/home",  # Twitter requires login
            # "https://www.linkedin.com/feed/",  # LinkedIn requires login
        ]
        for url in urls_to_crawl:
            await crawl_with_profile(profile_path, url)
 if __name__ == "__main__":
    try:
        # Run the async main function
        asyncio.run(main())
    except KeyboardInterrupt:
        logger.warning("Example interrupted by user", tag="DEMO")
    except Exception as e:
        logger.error(f"Error in example: {str(e)}", tag="DEMO")
--- a/docs/examples/llm_extraction_openai_pricing.py
+++ b/docs/examples/llm_extraction_openai_pricing.py
@@ -1,10 +1,11 @@
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
-from crawl4ai.extraction_strategy import *
+from crawl4ai import AsyncWebCrawler, LLMExtractionStrategy
 from crawl4ai.crawler_strategy import *
 import asyncio
 import os
 import json
 from pydantic import BaseModel, Field
-url = r"https://openai.com/api/pricing/"
+url = "https://openai.com/api/pricing/"
 class OpenAIModelFee(BaseModel):
@@ -14,10 +15,6 @@ class OpenAIModelFee(BaseModel):
        ..., description="Fee for output token for the OpenAI model."
    )
 from crawl4ai import AsyncWebCrawler
 async def main():
    # Use AsyncWebCrawler
    async with AsyncWebCrawler() as crawler:
@@ -26,7 +23,7 @@ async def main():
            word_count_threshold=1,
            extraction_strategy=LLMExtractionStrategy(
                # provider= "openai/gpt-4o", api_token = os.getenv('OPENAI_API_KEY'),
-                llmConfig=LlmConfig(provider="groq/llama-3.1-70b-versatile", api_token=os.getenv("GROQ_API_KEY")),
+                llm_config=LLMConfig(provider="groq/llama-3.1-70b-versatile", api_token=os.getenv("GROQ_API_KEY")),
                schema=OpenAIModelFee.model_json_schema(),
                extraction_type="schema",
                instruction="From the crawled content, extract all mentioned model names along with their "
--- a/docs/examples/llm_markdown_generator.py
+++ b/docs/examples/llm_markdown_generator.py
@@ -1,7 +1,7 @@
 import os
 import asyncio
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 from crawl4ai.content_filter_strategy import LLMContentFilter
 async def test_llm_filter():
@@ -23,7 +23,7 @@ async def test_llm_filter():
        # Initialize LLM filter with focused instruction
        filter = LLMContentFilter(
-            llmConfig=LlmConfig(provider="openai/gpt-4o", api_token=os.getenv('OPENAI_API_KEY')),
+            llm_config=LLMConfig(provider="openai/gpt-4o", api_token=os.getenv('OPENAI_API_KEY')),
            instruction="""
            Focus on extracting the core educational content about Python classes.
            Include:
@@ -43,7 +43,7 @@ async def test_llm_filter():
        )
        filter = LLMContentFilter(
-            llmConfig=LlmConfig(provider="openai/gpt-4o",api_token=os.getenv('OPENAI_API_KEY')),
+            llm_config=LLMConfig(provider="openai/gpt-4o",api_token=os.getenv('OPENAI_API_KEY')),
            chunk_token_threshold=2 ** 12 * 2, # 2048 * 2
            ignore_cache = True,
            instruction="""
--- a/docs/examples/quickstart_async.config.py
+++ b/docs/examples/quickstart_async.config.py
@@ -1,6 +1,6 @@
 import os, sys
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 sys.path.append(
    os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -211,7 +211,7 @@ async def extract_structured_data_using_llm(
        word_count_threshold=1,
        page_timeout=80000,
        extraction_strategy=LLMExtractionStrategy(
-            llmConfig=LlmConfig(provider=provider,api_token=api_token),
+            llm_config=LLMConfig(provider=provider,api_token=api_token),
            schema=OpenAIModelFee.model_json_schema(),
            extraction_type="schema",
            instruction="""From the crawled content, extract all mentioned model names along with their fees for input and output tokens. 
@@ -416,6 +416,7 @@ async def crawl_dynamic_content_pages_method_2():
 async def cosine_similarity_extraction():
    from crawl4ai.extraction_strategy import CosineStrategy
    crawl_config = CrawlerRunConfig(
        cache_mode=CacheMode.BYPASS,
        extraction_strategy=CosineStrategy(
@@ -507,6 +508,9 @@ async def ssl_certification():
        if result.success and result.ssl_certificate:
            cert = result.ssl_certificate
            tmp_dir = os.path.join(__location__, "tmp")
            os.makedirs(tmp_dir, exist_ok=True)
            # 1. Access certificate properties directly
            print("\nCertificate Information:")
            print(f"Issuer: {cert.issuer.get('CN', '')}")
@@ -529,67 +533,6 @@ async def ssl_certification():
            print(f"- DER: {os.path.join(tmp_dir, 'certificate.der')}")
 # Speed Comparison
 async def speed_comparison():
    print("\n--- Speed Comparison ---")
    # Firecrawl comparison
    from firecrawl import FirecrawlApp
    app = FirecrawlApp(api_key=os.environ["FIRECRAWL_API_KEY"])
    start = time.time()
    scrape_status = app.scrape_url(
        "https://www.nbcnews.com/business", params={"formats": ["markdown", "html"]}
    )
    end = time.time()
    print("Firecrawl:")
    print(f"Time taken: {end - start:.2f} seconds")
    print(f"Content length: {len(scrape_status['markdown'])} characters")
    print(f"Images found: {scrape_status['markdown'].count('cldnry.s-nbcnews.com')}")
    print()
    # Crawl4AI comparisons
    browser_config = BrowserConfig(headless=True)
    # Simple crawl
    async with AsyncWebCrawler(config=browser_config) as crawler:
        start = time.time()
        result = await crawler.arun(
            url="https://www.nbcnews.com/business",
            config=CrawlerRunConfig(
                cache_mode=CacheMode.BYPASS, word_count_threshold=0
            ),
        )
        end = time.time()
        print("Crawl4AI (simple crawl):")
        print(f"Time taken: {end - start:.2f} seconds")
        print(f"Content length: {len(result.markdown)} characters")
        print(f"Images found: {result.markdown.count('cldnry.s-nbcnews.com')}")
        print()
        # Advanced filtering
        start = time.time()
        result = await crawler.arun(
            url="https://www.nbcnews.com/business",
            config=CrawlerRunConfig(
                cache_mode=CacheMode.BYPASS,
                word_count_threshold=0,
                markdown_generator=DefaultMarkdownGenerator(
                    content_filter=PruningContentFilter(
                        threshold=0.48, threshold_type="fixed", min_word_threshold=0
                    )
                ),
            ),
        )
        end = time.time()
        print("Crawl4AI (Markdown Plus):")
        print(f"Time taken: {end - start:.2f} seconds")
        print(f"Content length: {len(result.markdown.raw_markdown)} characters")
        print(f"Fit Markdown: {len(result.markdown.fit_markdown)} characters")
        print(f"Images found: {result.markdown.raw_markdown.count('cldnry.s-nbcnews.com')}")
        print()
 # Main execution
 async def main():
    # Basic examples
--- a/docs/examples/quickstart_async.py
+++ b/docs/examples/quickstart_async.py
@@ -1,6 +1,6 @@
 import os, sys
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 # append parent directory to system path
 sys.path.append(
@@ -147,7 +147,7 @@ async def extract_structured_data_using_llm(
            url="https://openai.com/api/pricing/",
            word_count_threshold=1,
            extraction_strategy=LLMExtractionStrategy(
-                llmConfig=LlmConfig(provider=provider,api_token=api_token),
+                llm_config=LLMConfig(provider=provider,api_token=api_token),
                schema=OpenAIModelFee.model_json_schema(),
                extraction_type="schema",
                instruction="""From the crawled content, extract all mentioned model names along with their fees for input and output tokens. 
@@ -570,7 +570,7 @@ async def generate_knowledge_graph():
        relationships: List[Relationship]
    extraction_strategy = LLMExtractionStrategy(
-        llmConfig=LlmConfig(provider="openai/gpt-4o-mini",  api_token=os.getenv("OPENAI_API_KEY")),  # In case of Ollama just pass "no-token"
+        llm_config=LLMConfig(provider="openai/gpt-4o-mini",  api_token=os.getenv("OPENAI_API_KEY")),  # In case of Ollama just pass "no-token"
        schema=KnowledgeGraph.model_json_schema(),
        extraction_type="schema",
        instruction="""Extract entities and relationships from the given text.""",
--- a/docs/examples/quickstart_sync.py
+++ b/docs/examples/quickstart_sync.py
@@ -1,6 +1,6 @@
 import os
 import time
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 from crawl4ai.web_crawler import WebCrawler
 from crawl4ai.chunking_strategy import *
 from crawl4ai.extraction_strategy import *
@@ -179,7 +179,7 @@ def add_llm_extraction_strategy(crawler):
    result = crawler.run(
        url="https://www.nbcnews.com/business",
        extraction_strategy=LLMExtractionStrategy(
-            llmConfig =  LlmConfig(provider="openai/gpt-4o", api_token=os.getenv("OPENAI_API_KEY"))
+            llm_config =  LLMConfig(provider="openai/gpt-4o", api_token=os.getenv("OPENAI_API_KEY"))
        ),
    )
    cprint(
@@ -198,7 +198,7 @@ def add_llm_extraction_strategy(crawler):
    result = crawler.run(
        url="https://www.nbcnews.com/business",
        extraction_strategy=LLMExtractionStrategy(
-            llmConfig=LlmConfig(provider="openai/gpt-4o",api_token=os.getenv("OPENAI_API_KEY")),
+            llm_config=LLMConfig(provider="openai/gpt-4o",api_token=os.getenv("OPENAI_API_KEY")),
            instruction="I am interested in only financial news",
        ),
    )
@@ -210,7 +210,7 @@ def add_llm_extraction_strategy(crawler):
    result = crawler.run(
        url="https://www.nbcnews.com/business",
        extraction_strategy=LLMExtractionStrategy(
-            llmConfig=LlmConfig(provider="openai/gpt-4o",api_token=os.getenv("OPENAI_API_KEY")),
+            llm_config=LLMConfig(provider="openai/gpt-4o",api_token=os.getenv("OPENAI_API_KEY")),
            instruction="Extract only content related to technology",
        ),
    )
--- a/docs/examples/tutorial_v0.5.py
+++ b/docs/examples/tutorial_v0.5.py
@@ -13,11 +13,11 @@ from crawl4ai.deep_crawling import (
 )
 from crawl4ai.deep_crawling.scorers import KeywordRelevanceScorer
 from crawl4ai.async_crawler_strategy import AsyncHTTPCrawlerStrategy
-from crawl4ai.configs import ProxyConfig
+from crawl4ai.proxy_strategy import ProxyConfig
 from crawl4ai import RoundRobinProxyStrategy
 from crawl4ai.content_filter_strategy import LLMContentFilter
 from crawl4ai import DefaultMarkdownGenerator
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
 from crawl4ai.processors.pdf import PDFCrawlerStrategy, PDFContentScrapingStrategy
 from pprint import pprint
@@ -284,9 +284,9 @@ async def llm_content_filter():
    PART 5: LLM Content Filter
    This function demonstrates:
-    - Configuring LLM providers via LlmConfig
+    - Configuring LLM providers via LLMConfig
    - Using LLM to generate focused markdown
-    - LlmConfig for configuration
+    - LLMConfig for configuration
    Note: Requires a valid API key for the chosen LLM provider
    """
@@ -296,7 +296,7 @@ async def llm_content_filter():
    # Create LLM configuration
    # Replace with your actual API key or set as environment variable
-    llm_config = LlmConfig(
+    llm_config = LLMConfig(
        provider="gemini/gemini-1.5-pro", 
        api_token="env:GEMINI_API_KEY"  # Will read from GEMINI_API_KEY environment variable
    )
@@ -309,7 +309,7 @@ async def llm_content_filter():
    # Create markdown generator with LLM filter
    markdown_generator = DefaultMarkdownGenerator(
        content_filter=LLMContentFilter(
-            llmConfig=llm_config,
+            llm_config=llm_config,
            instruction="Extract key concepts and summaries"
        )
    )
@@ -381,7 +381,7 @@ async def llm_schema_generation():
    PART 7: LLM Schema Generation
    This function demonstrates:
-    - Configuring LLM providers via LlmConfig
+    - Configuring LLM providers via LLMConfig
    - Using LLM to generate extraction schemas
    - JsonCssExtractionStrategy
@@ -406,9 +406,9 @@ async def llm_schema_generation():
        <div class="rating">4.7/5</div>
    </div>
    """
-    print("\n📊 Setting up LlmConfig...")
+    print("\n📊 Setting up LLMConfig...")
    # Create LLM configuration
-    llm_config = LlmConfig(
+    llm_config = LLMConfig(
        provider="gemini/gemini-1.5-pro", 
        api_token="env:GEMINI_API_KEY"
    )
@@ -416,7 +416,7 @@ async def llm_schema_generation():
    print("  This would use the LLM to analyze HTML and create an extraction schema")
    schema = JsonCssExtractionStrategy.generate_schema(
    html=sample_html,
-    llmConfig = llm_config,
+    llm_config = llm_config,
    query="Extract product name and price"
    )
    print("\n✅ Generated Schema:")
--- a/docs/md_v2/advanced/identity-based-crawling.md
+++ b/docs/md_v2/advanced/identity-based-crawling.md
@@ -167,13 +167,114 @@ async with AsyncWebCrawler() as crawler:
 ---
-## 6. Summary
+## 6. Using the BrowserProfiler Class
- **Create** your user-data directory by launching Chrome/Chromium externally with `--user-data-dir=/some/path`.  
+Crawl4AI provides a dedicated `BrowserProfiler` class for managing browser profiles, making it easy to create, list, and delete profiles for identity-based browsing.
- **Log in** or configure sites as needed, then close the browser.  
+
- **Reference** that folder in `BrowserConfig(user_data_dir="...")` + `use_managed_browser=True`.  
+### Creating and Managing Profiles with BrowserProfiler
- Enjoy **persistent** sessions that reflect your real identity.  
+
- If you only need quick, ephemeral automation, **Magic Mode** might suffice.
+The `BrowserProfiler` class offers a comprehensive API for browser profile management:
 ```python
 import asyncio
 from crawl4ai import BrowserProfiler
 async def manage_profiles():
    # Create a profiler instance
    profiler = BrowserProfiler()
    # Create a profile interactively - opens a browser window
    profile_path = await profiler.create_profile(
        profile_name="my-login-profile"  # Optional: name your profile
    )
    print(f"Profile saved at: {profile_path}")
    # List all available profiles
    profiles = profiler.list_profiles()
    for profile in profiles:
        print(f"Profile: {profile['name']}")
        print(f"  Path: {profile['path']}")
        print(f"  Created: {profile['created']}")
        print(f"  Browser type: {profile['type']}")
    # Get a specific profile path by name
    specific_profile = profiler.get_profile_path("my-login-profile")
    # Delete a profile when no longer needed
    success = profiler.delete_profile("old-profile-name")
 asyncio.run(manage_profiles())
 ```
 **How profile creation works:**
 1. A browser window opens for you to interact with
 2. You log in to websites, set preferences, etc.
 3. When you're done, press 'q' in the terminal to close the browser
 4. The profile is saved in the Crawl4AI profiles directory
 5. You can use the returned path with `BrowserConfig.user_data_dir`
 ### Interactive Profile Management
 The `BrowserProfiler` also offers an interactive management console that guides you through profile creation, listing, and deletion:
 ```python
 import asyncio
 from crawl4ai import BrowserProfiler, AsyncWebCrawler, BrowserConfig
 # Define a function to use a profile for crawling
 async def crawl_with_profile(profile_path, url):
    browser_config = BrowserConfig(
        headless=True,
        use_managed_browser=True,
        user_data_dir=profile_path
    )
    async with AsyncWebCrawler(config=browser_config) as crawler:
        result = await crawler.arun(url)
        return result
 async def main():
    # Create a profiler instance
    profiler = BrowserProfiler()
    # Launch the interactive profile manager
    # Passing the crawl function as a callback adds a "crawl with profile" option
    await profiler.interactive_manager(crawl_callback=crawl_with_profile)
 asyncio.run(main())
 ```
 ### Legacy Methods
 For backward compatibility, the previous methods on `ManagedBrowser` are still available, but they delegate to the new `BrowserProfiler` class:
 ```python
 from crawl4ai.browser_manager import ManagedBrowser
 # These methods still work but use BrowserProfiler internally
 profiles = ManagedBrowser.list_profiles()
 ```
 ### Complete Example
 See the full example in `docs/examples/identity_based_browsing.py` for a complete demonstration of creating and using profiles for authenticated browsing using the new `BrowserProfiler` class.
 ---
 ## 7. Summary
 - **Create** your user-data directory either:
  - By launching Chrome/Chromium externally with `--user-data-dir=/some/path` 
  - Or by using the built-in `BrowserProfiler.create_profile()` method
  - Or through the interactive interface with `profiler.interactive_manager()`
 - **Log in** or configure sites as needed, then close the browser
 - **Reference** that folder in `BrowserConfig(user_data_dir="...")` + `use_managed_browser=True`
 - **List and reuse** profiles with `BrowserProfiler.list_profiles()`
 - **Manage** your profiles with the dedicated `BrowserProfiler` class
 - Enjoy **persistent** sessions that reflect your real identity
 - If you only need quick, ephemeral automation, **Magic Mode** might suffice
 **Recommended**: Always prefer a **Managed Browser** for robust, identity-based crawling and simpler interactions with complex sites. Use **Magic Mode** for quick tasks or prototypes where persistent data is unnecessary.
--- a/docs/md_v2/api/parameters.md
+++ b/docs/md_v2/api/parameters.md
@@ -71,7 +71,8 @@ We group them by category.
 | **`word_count_threshold`**   | `int` (default: ~200)                | Skips text blocks below X words. Helps ignore trivial sections.                                 |
 | **`extraction_strategy`**    | `ExtractionStrategy` (default: None) | If set, extracts structured data (CSS-based, LLM-based, etc.).                                  |
 | **`markdown_generator`**     | `MarkdownGenerationStrategy` (None)  | If you want specialized markdown output (citations, filtering, chunking, etc.).                 |
-| **`css_selector`**           | `str` (None)                         | Retains only the part of the page matching this selector.                                       |
+| **`css_selector`**           | `str` (None)                         | Retains only the part of the page matching this selector. Affects the entire extraction process. |
 | **`target_elements`**        | `List[str]` (None)                   | List of CSS selectors for elements to focus on for markdown generation and data extraction, while still processing the entire page for links, media, etc. Provides more flexibility than `css_selector`. |
 | **`excluded_tags`**          | `list` (None)                        | Removes entire tags (e.g. `["script", "style"]`).                                               |
 | **`excluded_selector`**      | `str` (None)                         | Like `css_selector` but to exclude. E.g. `"#ads, .tracker"`.                                    |
 | **`only_text`**              | `bool` (False)                       | If `True`, tries to extract text-only content.                                                  |
@@ -246,8 +247,8 @@ run_config = CrawlerRunConfig(
 )
 ```
-# 3. **LlmConfig** - Setting up LLM providers
+# 3. **LLMConfig** - Setting up LLM providers
-LlmConfig is useful to pass LLM provider config to strategies and functions that rely on LLMs to do extraction, filtering, schema generation etc. Currently it can be used in the following -
+LLMConfig is useful to pass LLM provider config to strategies and functions that rely on LLMs to do extraction, filtering, schema generation etc. Currently it can be used in the following -
 1. LLMExtractionStrategy
 2. LLMContentFilter
@@ -263,7 +264,7 @@ LlmConfig is useful to pass LLM provider config to strategies and functions that
 ## 3.2 Example Usage
 ```python
-llmConfig = LlmConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY"))
+llm_config = LLMConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY"))
 ```
 ## 4. Putting It All Together
@@ -271,7 +272,7 @@ llmConfig = LlmConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI
 - **Use** `BrowserConfig` for **global** browser settings: engine, headless, proxy, user agent.  
 - **Use** `CrawlerRunConfig` for each crawl’s **context**: how to filter content, handle caching, wait for dynamic elements, or run JS.  
 - **Pass** both configs to `AsyncWebCrawler` (the `BrowserConfig`) and then to `arun()` (the `CrawlerRunConfig`).  
- **Use** `LlmConfig` for LLM provider configurations that can be used across all extraction, filtering, schema generation tasks. Can be used in - `LLMExtractionStrategy`, `LLMContentFilter`, `JsonCssExtractionStrategy.generate_schema` & `JsonXPathExtractionStrategy.generate_schema`
+- **Use** `LLMConfig` for LLM provider configurations that can be used across all extraction, filtering, schema generation tasks. Can be used in - `LLMExtractionStrategy`, `LLMContentFilter`, `JsonCssExtractionStrategy.generate_schema` & `JsonXPathExtractionStrategy.generate_schema`
 ```python
 # Create a modified copy with the clone() method
--- a/docs/md_v2/api/strategies.md
+++ b/docs/md_v2/api/strategies.md
@@ -131,7 +131,7 @@ OverlappingWindowChunking(
 ```python
 from pydantic import BaseModel
 from crawl4ai.extraction_strategy import LLMExtractionStrategy
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 # Define schema
 class Article(BaseModel):
@@ -141,7 +141,7 @@ class Article(BaseModel):
 # Create strategy
 strategy = LLMExtractionStrategy(
-    llmConfig = LlmConfig(provider="ollama/llama2"),
+    llm_config = LLMConfig(provider="ollama/llama2"),
    schema=Article.schema(),
    instruction="Extract article details"
 )
@@ -198,7 +198,7 @@ result = await crawler.arun(
 ```python
 from crawl4ai.chunking_strategy import OverlappingWindowChunking
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 # Create chunking strategy
 chunker = OverlappingWindowChunking(
@@ -208,7 +208,7 @@ chunker = OverlappingWindowChunking(
 # Use with extraction strategy
 strategy = LLMExtractionStrategy(
-    llmConfig = LlmConfig(provider="ollama/llama2"),
+    llm_config = LLMConfig(provider="ollama/llama2"),
    chunking_strategy=chunker
 )
--- a/docs/md_v2/blog/index.md
+++ b/docs/md_v2/blog/index.md
@@ -16,7 +16,7 @@ My dear friends and crawlers, there you go, this is the release of Crawl4AI v0.5
 *   **Multiple Crawler Strategies:** Choose between the full-featured Playwright browser-based crawler or a new, *much* faster HTTP-only crawler for simpler tasks.
 *   **Docker Deployment:**  Deploy Crawl4AI as a scalable, self-contained service with built-in API endpoints and optional JWT authentication.
 *   **Command-Line Interface (CLI):**  Interact with Crawl4AI directly from your terminal.  Crawl, configure, and extract data with simple commands.
-*   **LLM Configuration (`LlmConfig`):** A new, unified way to configure LLM providers (OpenAI, Anthropic, Ollama, etc.) for extraction, filtering, and schema generation.  Simplifies API key management and switching between models.
+*   **LLM Configuration (`LLMConfig`):** A new, unified way to configure LLM providers (OpenAI, Anthropic, Ollama, etc.) for extraction, filtering, and schema generation.  Simplifies API key management and switching between models.
 **Minor Updates & Improvements:**
@@ -47,7 +47,7 @@ This release includes several breaking changes to improve the library's structur
 * **Config**: FastFilterChain has been replaced with FilterChain
 * **Deep-Crawl**: DeepCrawlStrategy.arun now returns Union[CrawlResultT, List[CrawlResultT], AsyncGenerator[CrawlResultT, None]]
 * **Proxy**: Removed synchronous WebCrawler support and related rate limiting configurations
-*   **LLM Parameters:** Use the new `LlmConfig` object instead of passing `provider`, `api_token`, `base_url`, and `api_base` directly to `LLMExtractionStrategy` and `LLMContentFilter`.
+*   **LLM Parameters:** Use the new `LLMConfig` object instead of passing `provider`, `api_token`, `base_url`, and `api_base` directly to `LLMExtractionStrategy` and `LLMContentFilter`.
 **In short:** Update imports, adjust `arun_many()` usage, check for optional fields, and review the Docker deployment guide.
--- a/docs/md_v2/blog/releases/0.5.0.md
+++ b/docs/md_v2/blog/releases/0.5.0.md
@@ -251,7 +251,7 @@ from crawl4ai import (
    RoundRobinProxyStrategy,
 )
 import asyncio
-from crawl4ai.configs import ProxyConfig
+from crawl4ai.proxy_strategy import ProxyConfig
 async def main():
    # Load proxies and create rotation strategy
    proxies = ProxyConfig.from_env()
@@ -305,13 +305,13 @@ asyncio.run(main())
 ```python
 from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, DefaultMarkdownGenerator
 from crawl4ai.content_filter_strategy import LLMContentFilter
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 import asyncio
-llm_config = LlmConfig(provider="gemini/gemini-1.5-pro", api_token="env:GEMINI_API_KEY")
+llm_config = LLMConfig(provider="gemini/gemini-1.5-pro", api_token="env:GEMINI_API_KEY")
 markdown_generator = DefaultMarkdownGenerator(
-    content_filter=LLMContentFilter(llmConfig=llm_config, instruction="Extract key concepts and summaries")
+    content_filter=LLMContentFilter(llm_config=llm_config, instruction="Extract key concepts and summaries")
 )
 config = CrawlerRunConfig(markdown_generator=markdown_generator)
@@ -335,13 +335,13 @@ asyncio.run(main())
 ```python
 from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
-llm_config = LlmConfig(provider="gemini/gemini-1.5-pro", api_token="env:GEMINI_API_KEY")
+llm_config = LLMConfig(provider="gemini/gemini-1.5-pro", api_token="env:GEMINI_API_KEY")
 schema = JsonCssExtractionStrategy.generate_schema(
    html="<div class='product'><h2>Product Name</h2><span class='price'>$99</span></div>",
-    llmConfig = llm_config,
+    llm_config = llm_config,
    query="Extract product name and price"
 )
 print(schema)
@@ -394,20 +394,20 @@ print(schema)
  serialization, especially for sets of allowed/blocked domains. No code changes
  required.
- **Added: New `LlmConfig` parameter.** This new parameter can be passed for
+- **Added: New `LLMConfig` parameter.** This new parameter can be passed for
  extraction, filtering, and schema generation tasks. It simplifies passing
  provider strings, API tokens, and base URLs across all sections where LLM
  configuration is necessary. It also enables reuse and allows for quick
  experimentation between different LLM configurations.
  ```python
-  from crawl4ai.async_configs import LlmConfig
+  from crawl4ai import LLMConfig
  from crawl4ai.extraction_strategy import LLMExtractionStrategy
  from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
-  # Example of using LlmConfig with LLMExtractionStrategy
+  # Example of using LLMConfig with LLMExtractionStrategy
-  llm_config = LlmConfig(provider="openai/gpt-4o", api_token="YOUR_API_KEY")
+  llm_config = LLMConfig(provider="openai/gpt-4o", api_token="YOUR_API_KEY")
-  strategy = LLMExtractionStrategy(llmConfig=llm_config, schema=...)
+  strategy = LLMExtractionStrategy(llm_config=llm_config, schema=...)
  # Example usage within a crawler
  async with AsyncWebCrawler() as crawler:
@@ -418,7 +418,7 @@ print(schema)
  ```
  **Breaking Change:** Removed old parameters like `provider`, `api_token`,
  `base_url`, and `api_base` from `LLMExtractionStrategy` and
-  `LLMContentFilter`. Users should migrate to using the `LlmConfig` object.
+  `LLMContentFilter`. Users should migrate to using the `LLMConfig` object.
 - **Changed: Improved browser context management and added shared data support.
  (Breaking Change:** `BrowserContext` API updated). Browser contexts are now
--- a/docs/md_v2/core/browser-crawler-config.md
+++ b/docs/md_v2/core/browser-crawler-config.md
@@ -4,7 +4,7 @@ Crawl4AI’s flexibility stems from two key classes:
 1. **`BrowserConfig`** – Dictates **how** the browser is launched and behaves (e.g., headless or visible, proxy, user agent).  
 2. **`CrawlerRunConfig`** – Dictates **how** each **crawl** operates (e.g., caching, extraction, timeouts, JavaScript code to run, etc.).  
-3. **`LlmConfig`** - Dictates **how** LLM providers are configured. (model, api token, base url, temperature etc.)
+3. **`LLMConfig`** - Dictates **how** LLM providers are configured. (model, api token, base url, temperature etc.)
 In most examples, you create **one** `BrowserConfig` for the entire crawler session, then pass a **fresh** or re-used `CrawlerRunConfig` whenever you call `arun()`. This tutorial shows the most commonly used parameters. If you need advanced or rarely used fields, see the [Configuration Parameters](../api/parameters.md).
@@ -239,7 +239,7 @@ The `clone()` method:
-## 3. LlmConfig Essentials
+## 3. LLMConfig Essentials
 ### Key fields to note
@@ -256,16 +256,16 @@ The `clone()` method:
   - If your provider has a custom endpoint
 ```python
-llmConfig = LlmConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY"))
+llm_config = LLMConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY"))
 ```
 ## 4. Putting It All Together
-In a typical scenario, you define **one** `BrowserConfig` for your crawler session, then create **one or more** `CrawlerRunConfig` & `LlmConfig` depending on each call’s needs:
+In a typical scenario, you define **one** `BrowserConfig` for your crawler session, then create **one or more** `CrawlerRunConfig` & `LLMConfig` depending on each call’s needs:
 ```python
 import asyncio
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LlmConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
 from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
 async def main():
@@ -289,14 +289,14 @@ async def main():
    # 3) Example LLM content filtering
-    gemini_config = LlmConfig(
+    gemini_config = LLMConfig(
        provider="gemini/gemini-1.5-pro" 
        api_token = "env:GEMINI_API_TOKEN"
    )
    # Initialize LLM filter with specific instruction
    filter = LLMContentFilter(
-        llmConfig=gemini_config,  # or your preferred provider
+        llm_config=gemini_config,  # or your preferred provider
        instruction="""
        Focus on extracting the core educational content.
        Include:
@@ -343,7 +343,7 @@ if __name__ == "__main__":
 For a **detailed list** of available parameters (including advanced ones), see:
- [BrowserConfig, CrawlerRunConfig & LlmConfig Reference](../api/parameters.md)  
+- [BrowserConfig, CrawlerRunConfig & LLMConfig Reference](../api/parameters.md)  
 You can explore topics like:
@@ -356,7 +356,7 @@ You can explore topics like:
 ## 6. Conclusion
-**BrowserConfig**, **CrawlerRunConfig** and **LlmConfig** give you straightforward ways to define:
+**BrowserConfig**, **CrawlerRunConfig** and **LLMConfig** give you straightforward ways to define:
 - **Which** browser to launch, how it should run, and any proxy or user agent needs.  
 - **How** each crawl should behave—caching, timeouts, JavaScript code, extraction strategies, etc.
--- a/docs/md_v2/core/content-selection.md
+++ b/docs/md_v2/core/content-selection.md
@@ -8,6 +8,10 @@ Below, we show how to configure these parameters and combine them for precise co
 ## 1. CSS-Based Selection
 There are two ways to select content from a page: using `css_selector` or the more flexible `target_elements`.
 ### 1.1 Using `css_selector`
 A straightforward way to **limit** your crawl results to a certain region of the page is **`css_selector`** in **`CrawlerRunConfig`**:
 ```python
@@ -32,6 +36,33 @@ if __name__ == "__main__":
 **Result**: Only elements matching that selector remain in `result.cleaned_html`.
 ### 1.2 Using `target_elements`
 The `target_elements` parameter provides more flexibility by allowing you to target **multiple elements** for content extraction while preserving the entire page context for other features:
 ```python
 import asyncio
 from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
 async def main():
    config = CrawlerRunConfig(
        # Target article body and sidebar, but not other content
        target_elements=["article.main-content", "aside.sidebar"]
    )
    async with AsyncWebCrawler() as crawler:
        result = await crawler.arun(
            url="https://example.com/blog-post", 
            config=config
        )
        print("Markdown focused on target elements")
        print("Links from entire page still available:", len(result.links.get("internal", [])))
 if __name__ == "__main__":
    asyncio.run(main())
 ```
 **Key difference**: With `target_elements`, the markdown generation and structural data extraction focus on those elements, but other page elements (like links, images, and tables) are still extracted from the entire page. This gives you fine-grained control over what appears in your markdown content while preserving full page context for link analysis and media collection.
 ---
 ## 2. Content Filtering & Exclusions
@@ -211,7 +242,7 @@ if __name__ == "__main__":
 import asyncio
 import json
 from pydantic import BaseModel, Field
-from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LlmConfig
+from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LLMConfig
 from crawl4ai.extraction_strategy import LLMExtractionStrategy
 class ArticleData(BaseModel):
@@ -220,7 +251,7 @@ class ArticleData(BaseModel):
 async def main():
    llm_strategy = LLMExtractionStrategy(
-        llmConfig = LlmConfig(provider="openai/gpt-4",api_token="sk-YOUR_API_KEY")
+        llm_config = LLMConfig(provider="openai/gpt-4",api_token="sk-YOUR_API_KEY")
        schema=ArticleData.schema(),
        extraction_type="schema",
        instruction="Extract 'headline' and a short 'summary' from the content."
@@ -404,15 +435,59 @@ Stick to BeautifulSoup strategy (default) when:
 ---
-## 7. Conclusion
+## 7. Combining CSS Selection Methods
-By mixing **css_selector** scoping, **content filtering** parameters, and advanced **extraction strategies**, you can precisely **choose** which data to keep. Key parameters in **`CrawlerRunConfig`** for content selection include:
+You can combine `css_selector` and `target_elements` in powerful ways to achieve fine-grained control over your output:
-1. **`css_selector`** – Basic scoping to an element or region.  
+```python
-2. **`word_count_threshold`** – Skip short blocks.  
+import asyncio
-3. **`excluded_tags`** – Remove entire HTML tags.  
+from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode
-4. **`exclude_external_links`**, **`exclude_social_media_links`**, **`exclude_domains`** – Filter out unwanted links or domains.  
+
-5. **`exclude_external_images`** – Remove images from external sources.  
+async def main():
-6. **`process_iframes`** – Merge iframe content if needed.  
+    # Target specific content but preserve page context
    config = CrawlerRunConfig(
        # Focus markdown on main content and sidebar
        target_elements=["#main-content", ".sidebar"],
        # Global filters applied to entire page
        excluded_tags=["nav", "footer", "header"],
        exclude_external_links=True,
        # Use basic content thresholds
        word_count_threshold=15,
        cache_mode=CacheMode.BYPASS
    )
    async with AsyncWebCrawler() as crawler:
        result = await crawler.arun(
            url="https://example.com/article",
            config=config
        )
        print(f"Content focuses on specific elements, but all links still analyzed")
        print(f"Internal links: {len(result.links.get('internal', []))}")
        print(f"External links: {len(result.links.get('external', []))}")
 if __name__ == "__main__":
    asyncio.run(main())
 ```
 This approach gives you the best of both worlds:
 - Markdown generation and content extraction focus on the elements you care about
 - Links, images and other page data still give you the full context of the page
 - Content filtering still applies globally
 ## 8. Conclusion
 By mixing **target_elements** or **css_selector** scoping, **content filtering** parameters, and advanced **extraction strategies**, you can precisely **choose** which data to keep. Key parameters in **`CrawlerRunConfig`** for content selection include:
 1. **`target_elements`** – Array of CSS selectors to focus markdown generation and data extraction, while preserving full page context for links and media.
 2. **`css_selector`** – Basic scoping to an element or region for all extraction processes.  
 3. **`word_count_threshold`** – Skip short blocks.  
 4. **`excluded_tags`** – Remove entire HTML tags.  
 5. **`exclude_external_links`**, **`exclude_social_media_links`**, **`exclude_domains`** – Filter out unwanted links or domains.  
 6. **`exclude_external_images`** – Remove images from external sources.  
 7. **`process_iframes`** – Merge iframe content if needed.  
 Combine these with structured extraction (CSS, LLM-based, or others) to build powerful crawls that yield exactly the content you want, from raw or cleaned HTML up to sophisticated JSON structures. For more detail, see [Configuration Reference](../api/parameters.md). Enjoy curating your data to the max!
--- a/docs/md_v2/core/deep-crawling.md
+++ b/docs/md_v2/core/deep-crawling.md
@@ -73,12 +73,18 @@ from crawl4ai.deep_crawling import BFSDeepCrawlStrategy
 strategy = BFSDeepCrawlStrategy(
    max_depth=2,               # Crawl initial page + 2 levels deep
    include_external=False,    # Stay within the same domain
    max_pages=50,              # Maximum number of pages to crawl (optional)
    score_threshold=0.3,       # Minimum score for URLs to be crawled (optional)
 )
 ```
 **Key parameters:**
 - **`max_depth`**: Number of levels to crawl beyond the starting page
 - **`include_external`**: Whether to follow links to other domains
 - **`max_pages`**: Maximum number of pages to crawl (default: infinite)
 - **`score_threshold`**: Minimum score for URLs to be crawled (default: -inf)
 - **`filter_chain`**: FilterChain instance for URL filtering
 - **`url_scorer`**: Scorer instance for evaluating URLs
 ### 2.2 DFSDeepCrawlStrategy (Depth-First Search)
@@ -91,12 +97,18 @@ from crawl4ai.deep_crawling import DFSDeepCrawlStrategy
 strategy = DFSDeepCrawlStrategy(
    max_depth=2,               # Crawl initial page + 2 levels deep
    include_external=False,    # Stay within the same domain
    max_pages=30,              # Maximum number of pages to crawl (optional)
    score_threshold=0.5,       # Minimum score for URLs to be crawled (optional)
 )
 ```
 **Key parameters:**
 - **`max_depth`**: Number of levels to crawl beyond the starting page
 - **`include_external`**: Whether to follow links to other domains
 - **`max_pages`**: Maximum number of pages to crawl (default: infinite)
 - **`score_threshold`**: Minimum score for URLs to be crawled (default: -inf)
 - **`filter_chain`**: FilterChain instance for URL filtering
 - **`url_scorer`**: Scorer instance for evaluating URLs
 ### 2.3 BestFirstCrawlingStrategy (⭐️ - Recommended Deep crawl strategy)
@@ -116,7 +128,8 @@ scorer = KeywordRelevanceScorer(
 strategy = BestFirstCrawlingStrategy(
    max_depth=2,
    include_external=False,
-    url_scorer=scorer
+    url_scorer=scorer,
    max_pages=25,              # Maximum number of pages to crawl (optional)
 )
 ```
@@ -124,6 +137,8 @@ This crawling approach:
 - Evaluates each discovered URL based on scorer criteria
 - Visits higher-scoring pages first
 - Helps focus crawl resources on the most relevant content
 - Can limit total pages crawled with `max_pages`
 - Does not need `score_threshold` as it naturally prioritizes by score
 ---
@@ -410,27 +425,64 @@ if __name__ == "__main__":
 ---
-## 8. Common Pitfalls & Tips
+## 8. Limiting and Controlling Crawl Size
-1.**Set realistic depth limits.** Be cautious with `max_depth` values > 3, which can exponentially increase crawl size. 
+### 8.1 Using max_pages
 You can limit the total number of pages crawled with the `max_pages` parameter:
 ```python
 # Limit to exactly 20 pages regardless of depth
 strategy = BFSDeepCrawlStrategy(
    max_depth=3,
    max_pages=20
 )
 ```
 This feature is useful for:
 - Controlling API costs
 - Setting predictable execution times
 - Focusing on the most important content
 - Testing crawl configurations before full execution
 ### 8.2 Using score_threshold
 For BFS and DFS strategies, you can set a minimum score threshold to only crawl high-quality pages:
 ```python
 # Only follow links with scores above 0.4
 strategy = DFSDeepCrawlStrategy(
    max_depth=2,
    url_scorer=KeywordRelevanceScorer(keywords=["api", "guide", "reference"]),
    score_threshold=0.4  # Skip URLs with scores below this value
 )
 ```
 Note that for BestFirstCrawlingStrategy, score_threshold is not needed since pages are already processed in order of highest score first.
 ## 9. Common Pitfalls & Tips
 1.**Set realistic limits.** Be cautious with `max_depth` values > 3, which can exponentially increase crawl size. Use `max_pages` to set hard limits.
 2.**Don't neglect the scoring component.** BestFirstCrawling works best with well-tuned scorers. Experiment with keyword weights for optimal prioritization.
 3.**Be a good web citizen.**  Respect robots.txt. (disabled by default)
 4.**Handle page errors gracefully.** Not all pages will be accessible. Check `result.status` when processing results.
-4.**Handle page errors gracefully.** Not all pages will be accessible. Check `result.success` and `result.error_message` when processing results.
+5.**Balance breadth vs. depth.** Choose your strategy wisely - BFS for comprehensive coverage, DFS for deep exploration, BestFirst for focused relevance-based crawling.
 ---
-## 9. Summary & Next Steps
+## 10. Summary & Next Steps
 In this **Deep Crawling with Crawl4AI** tutorial, you learned to:
- Configure **BFSDeepCrawlStrategy** and **BestFirstCrawlingStrategy**
+- Configure **BFSDeepCrawlStrategy**, **DFSDeepCrawlStrategy**, and **BestFirstCrawlingStrategy**
 - Process results in streaming or non-streaming mode
 - Apply filters to target specific content
 - Use scorers to prioritize the most relevant pages
 - Limit crawls with `max_pages` and `score_threshold` parameters
 - Build a complete advanced crawler with combined techniques
 With these tools, you can efficiently extract structured data from websites at scale, focusing precisely on the content you need for your specific use case.
--- a/docs/md_v2/core/link-media.md
+++ b/docs/md_v2/core/link-media.md
@@ -133,19 +133,28 @@ This approach is handy when you still want external links but need to block cert
 ### 3.1 Accessing `result.media`
-By default, Crawl4AI collects images, audio, and video URLs it finds on the page. These are stored in `result.media`, a dictionary keyed by media type (e.g., `images`, `videos`, `audio`).
+By default, Crawl4AI collects images, audio, video URLs, and data tables it finds on the page. These are stored in `result.media`, a dictionary keyed by media type (e.g., `images`, `videos`, `audio`, `tables`).
 **Basic Example**:
 ```python
 if result.success:
    # Get images
    images_info = result.media.get("images", [])
    print(f"Found {len(images_info)} images in total.")
-    for i, img in enumerate(images_info[:5]):  # Inspect just the first 5
+    for i, img in enumerate(images_info[:3]):  # Inspect just the first 3
        print(f"[Image {i}] URL: {img['src']}")
        print(f"           Alt text: {img.get('alt', '')}")
        print(f"           Score: {img.get('score')}")
        print(f"           Description: {img.get('desc', '')}\n")
    # Get tables
    tables = result.media.get("tables", [])
    print(f"Found {len(tables)} data tables in total.")
    for i, table in enumerate(tables):
        print(f"[Table {i}] Caption: {table.get('caption', 'No caption')}")
        print(f"           Columns: {len(table.get('headers', []))}")
        print(f"           Rows: {len(table.get('rows', []))}")
 ```
 **Structure Example**:
@@ -171,6 +180,19 @@ result.media = {
  ],
  "audio": [
    # Similar structure but with audio-specific fields
  ],
  "tables": [
    {
      "headers": ["Name", "Age", "Location"],
      "rows": [
        ["John Doe", "34", "New York"],
        ["Jane Smith", "28", "San Francisco"],
        ["Alex Johnson", "42", "Chicago"]
      ],
      "caption": "Employee Directory",
      "summary": "Directory of company employees"
    },
    # More tables if present
  ]
 }
 ```
@@ -199,7 +221,53 @@ crawler_cfg = CrawlerRunConfig(
 This setting attempts to discard images from outside the primary domain, keeping only those from the site you’re crawling.
-### 3.3 Additional Media Config
+### 3.3 Working with Tables
 Crawl4AI can detect and extract structured data from HTML tables. Tables are analyzed based on various criteria to determine if they are actual data tables (as opposed to layout tables), including:
 - Presence of thead and tbody sections
 - Use of th elements for headers
 - Column consistency
 - Text density
 - And other factors
 Tables that score above the threshold (default: 7) are extracted and stored in `result.media.tables`.
 **Accessing Table Data**:
 ```python
 if result.success:
    tables = result.media.get("tables", [])
    print(f"Found {len(tables)} data tables on the page")
    if tables:
        # Access the first table
        first_table = tables[0]
        print(f"Table caption: {first_table.get('caption', 'No caption')}")
        print(f"Headers: {first_table.get('headers', [])}")
        # Print the first 3 rows
        for i, row in enumerate(first_table.get('rows', [])[:3]):
            print(f"Row {i+1}: {row}")
 ```
 **Configuring Table Extraction**:
 You can adjust the sensitivity of the table detection algorithm with:
 ```python
 crawler_cfg = CrawlerRunConfig(
    table_score_threshold=5  # Lower value = more tables detected (default: 7)
 )
 ```
 Each extracted table contains:
 - `headers`: Column header names
 - `rows`: List of rows, each containing cell values
 - `caption`: Table caption text (if available)
 - `summary`: Table summary attribute (if specified)
 ### 3.4 Additional Media Config
 - **`screenshot`**: Set to `True` if you want a full-page screenshot stored as `base64` in `result.screenshot`.  
 - **`pdf`**: Set to `True` if you want a PDF version of the page in `result.pdf`.  
@@ -273,4 +341,11 @@ if __name__ == "__main__":
 ---
-**That’s it for Link & Media Analysis!** You’re now equipped to filter out unwanted sites and zero in on the images and videos that matter for your project.
+**That’s it for Link & Media Analysis!** You’re now equipped to filter out unwanted sites and zero in on the images and videos that matter for your project.
 ### Table Extraction Tips
 - Not all HTML tables are extracted - only those detected as "data tables" vs. layout tables.
 - Tables with inconsistent cell counts, nested tables, or those used purely for layout may be skipped.
 - If you're missing tables, try adjusting the `table_score_threshold` to a lower value (default is 7).
 The table detection algorithm scores tables based on features like consistent columns, presence of headers, text density, and more. Tables scoring above the threshold are considered data tables worth extracting.
--- a/docs/md_v2/core/markdown-generation.md
+++ b/docs/md_v2/core/markdown-generation.md
@@ -175,13 +175,13 @@ prune_filter = PruningContentFilter(
 For intelligent content filtering and high-quality markdown generation, you can use the **LLMContentFilter**. This filter leverages LLMs to generate relevant markdown while preserving the original content's meaning and structure:
 ```python
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LlmConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LLMConfig
 from crawl4ai.content_filter_strategy import LLMContentFilter
 async def main():
    # Initialize LLM filter with specific instruction
    filter = LLMContentFilter(
-        llmConfig = LlmConfig(provider="openai/gpt-4o",api_token="your-api-token"), #or use environment variable
+        llm_config = LLMConfig(provider="openai/gpt-4o",api_token="your-api-token"), #or use environment variable
        instruction="""
        Focus on extracting the core educational content.
        Include:
--- a/docs/md_v2/core/quickstart.md
+++ b/docs/md_v2/core/quickstart.md
@@ -128,7 +128,7 @@ Crawl4AI can also extract structured data (JSON) using CSS or XPath selectors. B
 ```python
 from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 # Generate a schema (one-time cost)
 html = "<div class='product'><h2>Gaming Laptop</h2><span class='price'>$999.99</span></div>"
@@ -136,13 +136,13 @@ html = "<div class='product'><h2>Gaming Laptop</h2><span class='price'>$999.99</
 # Using OpenAI (requires API token)
 schema = JsonCssExtractionStrategy.generate_schema(
    html,
-    llmConfig = LlmConfig(provider="openai/gpt-4o",api_token="your-openai-token")  # Required for OpenAI
+    llm_config = LLMConfig(provider="openai/gpt-4o",api_token="your-openai-token")  # Required for OpenAI
 )
 # Or using Ollama (open source, no token needed)
 schema = JsonCssExtractionStrategy.generate_schema(
    html,
-    llmConfig = LlmConfig(provider="ollama/llama3.3", api_token=None)  # Not needed for Ollama
+    llm_config = LLMConfig(provider="ollama/llama3.3", api_token=None)  # Not needed for Ollama
 )
 # Use the schema for fast, repeated extractions
@@ -211,7 +211,7 @@ import os
 import json
 import asyncio
 from pydantic import BaseModel, Field
-from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LlmConfig
+from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LLMConfig
 from crawl4ai.extraction_strategy import LLMExtractionStrategy
 class OpenAIModelFee(BaseModel):
@@ -241,7 +241,7 @@ async def extract_structured_data_using_llm(
        word_count_threshold=1,
        page_timeout=80000,
        extraction_strategy=LLMExtractionStrategy(
-            llmConfig = LlmConfig(provider=provider,api_token=api_token),
+            llm_config = LLMConfig(provider=provider,api_token=api_token),
            schema=OpenAIModelFee.model_json_schema(),
            extraction_type="schema",
            instruction="""From the crawled content, extract all mentioned model names along with their fees for input and output tokens. 
--- a/docs/md_v2/extraction/llm-strategies.md
+++ b/docs/md_v2/extraction/llm-strategies.md
@@ -76,7 +76,7 @@ Below is an overview of important LLM extraction parameters. All are typically s
 ```python
 extraction_strategy = LLMExtractionStrategy(
-    llmConfig = LlmConfig(provider="openai/gpt-4", api_token="YOUR_OPENAI_KEY"),
+    llm_config = LLMConfig(provider="openai/gpt-4", api_token="YOUR_OPENAI_KEY"),
    schema=MyModel.model_json_schema(),
    extraction_type="schema",
    instruction="Extract a list of items from the text with 'name' and 'price' fields.",
@@ -101,7 +101,7 @@ import asyncio
 import json
 from pydantic import BaseModel, Field
 from typing import List
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LlmConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
 from crawl4ai.extraction_strategy import LLMExtractionStrategy
 class Product(BaseModel):
@@ -111,7 +111,7 @@ class Product(BaseModel):
 async def main():
    # 1. Define the LLM extraction strategy
    llm_strategy = LLMExtractionStrategy(
-        llmConfig = LlmConfig(provider="openai/gpt-4o-mini", api_token=os.getenv('OPENAI_API_KEY')),
+        llm_config = LLMConfig(provider="openai/gpt-4o-mini", api_token=os.getenv('OPENAI_API_KEY')),
        schema=Product.schema_json(), # Or use model_json_schema()
        extraction_type="schema",
        instruction="Extract all product objects with 'name' and 'price' from the content.",
--- a/docs/md_v2/extraction/no-llm-strategies.md
+++ b/docs/md_v2/extraction/no-llm-strategies.md
@@ -415,7 +415,7 @@ The schema generator is available as a static method on both `JsonCssExtractionS
 ```python
 from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, JsonXPathExtractionStrategy
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 # Sample HTML with product information
 html = """
@@ -435,14 +435,14 @@ html = """
 css_schema = JsonCssExtractionStrategy.generate_schema(
    html,
    schema_type="css", 
-    llmConfig = LlmConfig(provider="openai/gpt-4o",api_token="your-openai-token")
+    llm_config = LLMConfig(provider="openai/gpt-4o",api_token="your-openai-token")
 )
 # Option 2: Using Ollama (open source, no token needed)
 xpath_schema = JsonXPathExtractionStrategy.generate_schema(
    html,
    schema_type="xpath",
-    llmConfig = LlmConfig(provider="ollama/llama3.3", api_token=None)  # Not needed for Ollama
+    llm_config = LLMConfig(provider="ollama/llama3.3", api_token=None)  # Not needed for Ollama
 )
 # Use the generated schema for fast, repeated extractions
--- a/docs/snippets/deep_crawl/1.intro.py
+++ b/docs/snippets/deep_crawl/1.intro.py
@@ -0,0 +1,78 @@
 import asyncio
 from typing import List
 from crawl4ai import (
    AsyncWebCrawler,
    CrawlerRunConfig,
    BFSDeepCrawlStrategy,
    CrawlResult,
    FilterChain,
    DomainFilter,
    URLPatternFilter,
 )
 # Import necessary classes from crawl4ai library:
 # - AsyncWebCrawler: The main class for web crawling.
 # - CrawlerRunConfig: Configuration class for crawler behavior.
 # - BFSDeepCrawlStrategy: Breadth-First Search deep crawling strategy.
 # - CrawlResult: Data model for individual crawl results.
 # - FilterChain: Used to chain multiple URL filters.
 # - URLPatternFilter: Filter URLs based on patterns.
 # You had from crawl4ai.deep_crawling.filters import FilterChain, URLPatternFilter, which is also correct,
 # but for simplicity and consistency, we will use the direct import from crawl4ai in this example, as it is re-exported in __init__.py
 async def basic_deep_crawl():
    """
    Performs a basic deep crawl starting from a seed URL, demonstrating:
    - Breadth-First Search (BFS) deep crawling strategy.
    - Filtering URLs based on URL patterns.
    - Accessing crawl results and metadata.
    """
    # 1. Define URL Filters:
    # Create a URLPatternFilter to include only URLs containing "text".
    # This filter will be used to restrict crawling to URLs that are likely to contain textual content.
    url_filter = URLPatternFilter(
        patterns=[
            "*text*", # Include URLs that contain "text" in their path or URL
        ]
    )
    # Create a DomainFilter to allow only URLs from the "groq.com" domain and block URLs from the "example.com" domain.
    # This filter will be used to restrict crawling to URLs within the "groq.com" domain.
    domain_filter = DomainFilter(
        allowed_domains=["groq.com"],
        blocked_domains=["example.com"],
    )
    # 2. Configure CrawlerRunConfig for Deep Crawling:
    # Configure CrawlerRunConfig to use BFSDeepCrawlStrategy for deep crawling.
    config = CrawlerRunConfig(
        deep_crawl_strategy=BFSDeepCrawlStrategy(
            max_depth=2,  # Set the maximum depth of crawling to 2 levels from the start URL
            max_pages=10, # Limit the total number of pages to crawl to 10, to prevent excessive crawling
            include_external=False, # Set to False to only crawl URLs within the same domain as the start URL
            filter_chain=FilterChain(filters=[url_filter, domain_filter]), # Apply the URLPatternFilter and DomainFilter to filter URLs during deep crawl
        ),
        verbose=True, # Enable verbose logging to see detailed output during crawling
    )
    # 3. Initialize and Run AsyncWebCrawler:
    # Use AsyncWebCrawler as a context manager for automatic start and close.
    async with AsyncWebCrawler() as crawler:
        results: List[CrawlResult] = await crawler.arun(
            # url="https://docs.crawl4ai.com", # Uncomment to use crawl4ai documentation as start URL
            url="https://console.groq.com/docs", # Set the start URL for deep crawling to Groq documentation
            config=config, # Pass the configured CrawlerRunConfig to arun method
        )
        # 4. Process and Print Crawl Results:
        # Iterate through the list of CrawlResult objects returned by the deep crawl.
        for result in results:
            # Print the URL and its crawl depth from the metadata for each crawled URL.
            print(f"URL: {result.url}, Depth: {result.metadata.get('depth', 0)}")
 if __name__ == "__main__":
    import asyncio
    asyncio.run(basic_deep_crawl())
--- a/docs/snippets/deep_crawl/2.filters.py
+++ b/docs/snippets/deep_crawl/2.filters.py
@@ -0,0 +1,162 @@
 import asyncio
 from typing import List
 from crawl4ai import (
    AsyncWebCrawler,
    CrawlerRunConfig,
    BFSDeepCrawlStrategy,
    CrawlResult,
    URLFilter, # Base class for filters, not directly used in examples but good to import for context
    ContentTypeFilter,
    DomainFilter,
    FilterChain,
    URLPatternFilter,
    SEOFilter # Advanced filter, can be introduced later or as bonus
 )
 async def deep_crawl_filter_tutorial_part_2():
    """
    Tutorial demonstrating URL filters in Crawl4AI, focusing on isolated filter behavior
    before integrating them into a deep crawl.
    This tutorial covers:
    - Testing individual filters with synthetic URLs.
    - Understanding filter logic and behavior in isolation.
    - Combining filters using FilterChain.
    - Integrating filters into a deep crawling example.
    """
    # === Introduction: URL Filters in Isolation ===
    print("\n" + "=" * 40)
    print("=== Introduction: URL Filters in Isolation ===")
    print("=" * 40 + "\n")
    print("In this section, we will explore each filter individually using synthetic URLs.")
    print("This allows us to understand exactly how each filter works before using them in a crawl.\n")
    # === 2. ContentTypeFilter - Testing in Isolation ===
    print("\n" + "=" * 40)
    print("=== 2. ContentTypeFilter - Testing in Isolation ===")
    print("=" * 40 + "\n")
    # 2.1. Create ContentTypeFilter:
    # Create a ContentTypeFilter to allow only 'text/html' and 'application/json' content types 
    # BASED ON URL EXTENSIONS.
    content_type_filter = ContentTypeFilter(allowed_types=["text/html", "application/json"])
    print("ContentTypeFilter created, allowing types (by extension): ['text/html', 'application/json']")
    print("Note: ContentTypeFilter in Crawl4ai works by checking URL file extensions, not HTTP headers.")
    # 2.2. Synthetic URLs for Testing:
    # ContentTypeFilter checks URL extensions. We provide URLs with different extensions to test.
    test_urls_content_type = [
        "https://example.com/page.html",       # Should pass: .html extension (text/html)
        "https://example.com/data.json",       # Should pass: .json extension (application/json)
        "https://example.com/image.png",       # Should reject: .png extension (not allowed type)
        "https://example.com/document.pdf",    # Should reject: .pdf extension (not allowed type)
        "https://example.com/page",            # Should pass: no extension (defaults to allow) - check default behaviour!
        "https://example.com/page.xhtml",      # Should pass: .xhtml extension (text/html)
    ]
    # 2.3. Apply Filter and Show Results:
    print("\n=== Testing ContentTypeFilter (URL Extension based) ===")
    for url in test_urls_content_type:
        passed = content_type_filter.apply(url)
        result = "PASSED" if passed else "REJECTED"
        extension = ContentTypeFilter._extract_extension(url) # Show extracted extension for clarity
        print(f"- URL: {url} - {result} (Extension: '{extension or 'No Extension'}')")
    print("=" * 40)
    input("Press Enter to continue to DomainFilter example...")
    # === 3. DomainFilter - Testing in Isolation ===
    print("\n" + "=" * 40)
    print("=== 3. DomainFilter - Testing in Isolation ===")
    print("=" * 40 + "\n")
    # 3.1. Create DomainFilter:
    domain_filter = DomainFilter(allowed_domains=["crawl4ai.com", "example.com"])
    print("DomainFilter created, allowing domains: ['crawl4ai.com', 'example.com']")
    # 3.2. Synthetic URLs for Testing:
    test_urls_domain = [
        "https://docs.crawl4ai.com/api",
        "https://example.com/products",
        "https://another-website.org/blog",
        "https://sub.example.com/about",
        "https://crawl4ai.com.attacker.net", # Corrected example: now should be rejected
    ]
    # 3.3. Apply Filter and Show Results:
    print("\n=== Testing DomainFilter ===")
    for url in test_urls_domain:
        passed = domain_filter.apply(url)
        result = "PASSED" if passed else "REJECTED"
        print(f"- URL: {url} - {result}")
    print("=" * 40)
    input("Press Enter to continue to FilterChain example...")
    # === 4. FilterChain - Combining Filters ===
    print("\n" + "=" * 40)
    print("=== 4. FilterChain - Combining Filters ===")
    print("=" * 40 + "\n")
    combined_filter = FilterChain(
        filters=[
            URLPatternFilter(patterns=["*api*"]),
            ContentTypeFilter(allowed_types=["text/html"]), # Still URL extension based
            DomainFilter(allowed_domains=["docs.crawl4ai.com"]),
        ]
    )
    print("FilterChain created, combining URLPatternFilter, ContentTypeFilter, and DomainFilter.")
    test_urls_combined = [
        "https://docs.crawl4ai.com/api/async-webcrawler",
        "https://example.com/api/products",
        "https://docs.crawl4ai.com/core/crawling",
        "https://another-website.org/api/data",
    ]
    # 4.3. Apply FilterChain and Show Results
    print("\n=== Testing FilterChain (URLPatternFilter + ContentTypeFilter + DomainFilter) ===")
    for url in test_urls_combined:
        passed = await combined_filter.apply(url)
        result = "PASSED" if passed else "REJECTED"
        print(f"- URL: {url} - {result}")
    print("=" * 40)
    input("Press Enter to continue to Deep Crawl with FilterChain example...")
    # === 5. Deep Crawl with FilterChain ===
    print("\n" + "=" * 40)
    print("=== 5. Deep Crawl with FilterChain ===")
    print("=" * 40 + "\n")
    print("Finally, let's integrate the FilterChain into a deep crawl example.")
    config_final_crawl = CrawlerRunConfig(
        deep_crawl_strategy=BFSDeepCrawlStrategy(
            max_depth=2,
            max_pages=10,
            include_external=False,
            filter_chain=combined_filter
        ),
        verbose=False,
    )
    async with AsyncWebCrawler() as crawler:
        results_final_crawl: List[CrawlResult] = await crawler.arun(
            url="https://docs.crawl4ai.com", config=config_final_crawl
        )
        print("=== Crawled URLs (Deep Crawl with FilterChain) ===")
        for result in results_final_crawl:
            print(f"- {result.url}, Depth: {result.metadata.get('depth', 0)}")
        print("=" * 40)
    print("\nTutorial Completed! Review the output of each section to understand URL filters.")
 if __name__ == "__main__":
    asyncio.run(deep_crawl_filter_tutorial_part_2())
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -1,4 +1,4 @@
-site_name: Crawl4AI Documentation (v0.4.3b2)
+site_name: Crawl4AI Documentation (v0.5.x)
 site_description:  🚀🤖 Crawl4AI, Open-source LLM-Friendly Web Crawler & Scraper
 site_url: https://docs.crawl4ai.com
 repo_url: https://github.com/unclecode/crawl4ai
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,12 +36,13 @@ dependencies = [
    "aiofiles",
    "rich>=13.9.4",
    "cssselect>=1.2.0",
-    "httpx==0.27.2",
+    "httpx>=0.27.2",
    "fake-useragent>=2.0.3",
    "click>=8.1.7",
    "pyperclip>=1.8.2",
    "faust-cchardet>=2.1.19",
-    "aiohttp>=3.11.11"
+    "aiohttp>=3.11.11",
    "humanize>=4.10.0"
 ]
 classifiers = [
    "Development Status :: 4 - Beta",
@@ -77,7 +78,7 @@ crawl4ai-download-models = "crawl4ai.model_loader:main"
 crawl4ai-migrate = "crawl4ai.migrations:main"
 crawl4ai-setup = "crawl4ai.install:post_install"
 crawl4ai-doctor = "crawl4ai.install:doctor"
-crwl = "crawl4ai.cli:cli"
+crwl = "crawl4ai.cli:main"
 [tool.setuptools]
 packages = {find = {where = ["."], include = ["crawl4ai*"]}}
--- a/tests/20241401/test_llm_filter.py
+++ b/tests/20241401/test_llm_filter.py
@@ -1,7 +1,7 @@
 import os
 import asyncio
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 from crawl4ai.content_filter_strategy import LLMContentFilter
 async def test_llm_filter():
@@ -23,7 +23,7 @@ async def test_llm_filter():
        # Initialize LLM filter with focused instruction
        filter = LLMContentFilter(
-            llmConfig=LlmConfig(provider="openai/gpt-4o",api_token=os.getenv('OPENAI_API_KEY')),
+            llm_config=LLMConfig(provider="openai/gpt-4o",api_token=os.getenv('OPENAI_API_KEY')),
            instruction="""
            Focus on extracting the core educational content about Python classes.
            Include:
@@ -43,7 +43,7 @@ async def test_llm_filter():
        )
        filter = LLMContentFilter(
-            llmConfig = LlmConfig(provider="openai/gpt-4o",api_token=os.getenv('OPENAI_API_KEY')),
+            llm_config = LLMConfig(provider="openai/gpt-4o",api_token=os.getenv('OPENAI_API_KEY')),
            chunk_token_threshold=2 ** 12 * 2, # 2048 * 2
            instruction="""
            Extract the main educational content while preserving its original wording and substance completely. Your task is to:
--- a/tests/async/test_chunking_and_extraction_strategies.py
+++ b/tests/async/test_chunking_and_extraction_strategies.py
@@ -7,7 +7,7 @@ import json
 parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(parent_dir)
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 from crawl4ai.async_webcrawler import AsyncWebCrawler
 from crawl4ai.chunking_strategy import RegexChunking
 from crawl4ai.extraction_strategy import LLMExtractionStrategy
@@ -49,7 +49,7 @@ async def test_llm_extraction_strategy():
    async with AsyncWebCrawler(verbose=True) as crawler:
        url = "https://www.nbcnews.com/business"
        extraction_strategy = LLMExtractionStrategy(
-            llmConfig=LlmConfig(provider="openai/gpt-4o-mini",api_token=os.getenv("OPENAI_API_KEY")),
+            llm_config=LLMConfig(provider="openai/gpt-4o-mini",api_token=os.getenv("OPENAI_API_KEY")),
            instruction="Extract only content related to technology",
        )
        result = await crawler.arun(
--- a/tests/browser/test_launch_standalone.py
+++ b/tests/browser/test_launch_standalone.py
@@ -0,0 +1,17 @@
 from crawl4ai.browser_profiler import BrowserProfiler
 import asyncio
 if __name__ == "__main__":
    # Test launching a standalone browser
    async def test_standalone_browser():
        profiler = BrowserProfiler()
        cdp_url = await profiler.launch_standalone_browser(
            browser_type="chromium",
            user_data_dir="~/.crawl4ai/browser_profile/test-browser-data",
            debugging_port=9222,
            headless=False
        )
        print(f"CDP URL: {cdp_url}")
    asyncio.run(test_standalone_browser())
--- a/tests/docker/test_docker.py
+++ b/tests/docker/test_docker.py
@@ -7,7 +7,7 @@ from crawl4ai import (
    BrowserConfig, CrawlerRunConfig, DefaultMarkdownGenerator,
    PruningContentFilter, JsonCssExtractionStrategy, LLMContentFilter, CacheMode
 )
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 from crawl4ai.docker_client import Crawl4aiDockerClient
 class Crawl4AiTester:
@@ -143,7 +143,7 @@ async def test_with_client():
            cache_mode=CacheMode.BYPASS,
            markdown_generator=DefaultMarkdownGenerator(
                content_filter=LLMContentFilter(
-                    llmConfig=LlmConfig(provider="openai/gpt-40"),
+                    llm_config=LLMConfig(provider="openai/gpt-40"),
                    instruction="Extract key technical concepts"
                )
            ),
--- a/tests/docker/test_serialization.py
+++ b/tests/docker/test_serialization.py
@@ -2,7 +2,7 @@ import inspect
 from typing import Any, Dict
 from enum import Enum
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 def to_serializable_dict(obj: Any) -> Dict:
    """
@@ -224,7 +224,7 @@ if __name__ == "__main__":
    config3 = CrawlerRunConfig(
        markdown_generator=DefaultMarkdownGenerator(
            content_filter=LLMContentFilter(
-                llmConfig = LlmConfig(provider="openai/gpt-4"),
+                llm_config = LLMConfig(provider="openai/gpt-4"),
                instruction="Extract key technical concepts",
                chunk_token_threshold=2000,
                overlap_rate=0.1
--- a/tests/test_web_crawler.py
+++ b/tests/test_web_crawler.py
@@ -1,5 +1,5 @@
 import unittest, os
-from crawl4ai.async_configs import LlmConfig
+from crawl4ai import LLMConfig
 from crawl4ai.web_crawler import WebCrawler
 from crawl4ai.chunking_strategy import (
    RegexChunking,
@@ -43,7 +43,7 @@ class TestWebCrawler(unittest.TestCase):
            word_count_threshold=5,
            chunking_strategy=FixedLengthWordChunking(chunk_size=100),
            extraction_strategy=LLMExtractionStrategy(
-                llmConfig=LlmConfig(provider="openai/gpt-3.5-turbo", api_token=os.getenv("OPENAI_API_KEY"))
+                llm_config=LLMConfig(provider="openai/gpt-3.5-turbo", api_token=os.getenv("OPENAI_API_KEY"))
            ),
            bypass_cache=True,
        )
`@@ -1,2 +1,2 @@`
	`# crawl4ai/_version.py`	`# crawl4ai/_version.py`
	`__version__ = "0.5.0"`	`__version__ = "0.5.0.post4"`
		`@@ -1,2 +0,0 @@`
			`from .proxy_config import ProxyConfig`
			`__all__ = ["ProxyConfig"]`