refactor(llm): rename LlmConfig to LLMConfig for consistency

Rename LlmConfig to LLMConfig across the codebase to follow consistent naming conventions. Update all imports and usages to use the new name. Update documentation and examples to reflect the change. BREAKING CHANGE: LlmConfig has been renamed to LLMConfig. Users need to update their imports and usage.
2025-03-05 14:17:04 +08:00
parent e896c08f9c
commit baee4949d3
33 changed files with 362 additions and 174 deletions
--- a/crawl4ai/init.py
+++ b/crawl4ai/init.py
@@ -2,7 +2,8 @@
 import warnings

 from .async_webcrawler import AsyncWebCrawler, CacheMode
-from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig
+from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig, LLMConfig
+
 from .content_scraping_strategy import (
    ContentScrapingStrategy,
    WebScrapingStrategy,
@@ -68,6 +69,7 @@ __all__ = [
    "AsyncLogger",
    "AsyncWebCrawler",
    "BrowserProfiler",
+    "LLMConfig",
    "DeepCrawlStrategy",
    "BFSDeepCrawlStrategy",
    "BestFirstCrawlingStrategy",
--- a/crawl4ai/async_configs.py
+++ b/crawl4ai/async_configs.py
@@ -13,13 +13,15 @@ from .config import (
 from .user_agent_generator import UAGen, ValidUAGenerator  # , OnlineUAGenerator
 from .extraction_strategy import ExtractionStrategy
 from .chunking_strategy import ChunkingStrategy, RegexChunking
+
 from .markdown_generation_strategy import MarkdownGenerationStrategy
 from .content_scraping_strategy import ContentScrapingStrategy, WebScrapingStrategy
 from .deep_crawling import DeepCrawlStrategy
-from typing import Union, List
+
 from .cache_context import CacheMode
 from .proxy_strategy import ProxyRotationStrategy

+from typing import Union, List
 import inspect
 from typing import Any, Dict, Optional
 from enum import Enum
@@ -1042,7 +1044,7 @@ class CrawlerRunConfig():
        return CrawlerRunConfig.from_kwargs(config_dict)


-class LlmConfig:
+class LLMConfig:
    def __init__(
        self,
        provider: str = DEFAULT_PROVIDER,
@@ -1063,8 +1065,8 @@ class LlmConfig:


    @staticmethod
-    def from_kwargs(kwargs: dict) -> "LlmConfig":
-        return LlmConfig(
+    def from_kwargs(kwargs: dict) -> "LLMConfig":
+        return LLMConfig(
            provider=kwargs.get("provider", DEFAULT_PROVIDER),
            api_token=kwargs.get("api_token"),
            base_url=kwargs.get("base_url"),
@@ -1084,8 +1086,8 @@ class LlmConfig:
            **kwargs: Key-value pairs of configuration options to update

        Returns:
-            LLMConfig: A new instance with the specified updates
+            llm_config: A new instance with the specified updates
        """
        config_dict = self.to_dict()
        config_dict.update(kwargs)
-        return LlmConfig.from_kwargs(config_dict)
+        return LLMConfig.from_kwargs(config_dict)
--- a/crawl4ai/cli.py
+++ b/crawl4ai/cli.py
@@ -1,9 +1,7 @@
 import click
 import os
 import time
-import datetime
-import sys
-import shutil
+
 import humanize
 from typing import Dict, Any, Optional, List
 import json
@@ -13,7 +11,6 @@ from rich.console import Console
 from rich.table import Table
 from rich.panel import Panel
 from rich.prompt import Prompt, Confirm
-from rich.style import Style

 from crawl4ai import (
    CacheMode,
@@ -26,12 +23,12 @@ from crawl4ai import (
    JsonXPathExtractionStrategy,
    BM25ContentFilter, 
    PruningContentFilter,
-    BrowserProfiler
+    BrowserProfiler,
+    LLMConfig
 )
 from litellm import completion
 from pathlib import Path

-from crawl4ai.async_configs import LlmConfig

 # Initialize rich console
 console = Console()
@@ -647,7 +644,7 @@ def crawl_cmd(url: str, browser_config: str, crawler_config: str, filter_config:
                    raise click.ClickException("LLM provider and API token are required for LLM extraction")

                crawler_cfg.extraction_strategy = LLMExtractionStrategy(
-                    llmConfig=LlmConfig(provider=extract_conf["provider"], api_token=extract_conf["api_token"]),
+                    llm_config=LLMConfig(provider=extract_conf["provider"], api_token=extract_conf["api_token"]),
                    instruction=extract_conf["instruction"],
                    schema=schema_data,
                    **extract_conf.get("params", {})
--- a/crawl4ai/content_filter_strategy.py
+++ b/crawl4ai/content_filter_strategy.py
@@ -16,13 +16,13 @@ from .utils import (
    extract_xml_data,
    merge_chunks,
 )
+from .types import LLMConfig
+from .config import DEFAULT_PROVIDER, OVERLAP_RATE, WORD_TOKEN_RATE
 from abc import ABC, abstractmethod
 import math
 from snowballstemmer import stemmer
-from .config import DEFAULT_PROVIDER, OVERLAP_RATE, WORD_TOKEN_RATE, PROVIDER_MODELS
 from .models import TokenUsage
 from .prompts import PROMPT_FILTER_CONTENT
-import os
 import json
 import hashlib
 from pathlib import Path
@@ -770,37 +770,56 @@ class PruningContentFilter(RelevantContentFilter):


 class LLMContentFilter(RelevantContentFilter):
-    """Content filtering using LLMs to generate relevant markdown."""
+    """Content filtering using LLMs to generate relevant markdown.
+
+    How it works:
+    1. Extracts page metadata with fallbacks.
+    2. Extracts text chunks from the body element.
+    3. Applies LLMs to generate markdown for each chunk.
+    4. Filters out chunks below the threshold.
+    5. Sorts chunks by score in descending order.
+    6. Returns the top N chunks.
+
+    Attributes:
+        llm_config (LLMConfig): LLM configuration object.
+        instruction (str): Instruction for LLM markdown generation
+        chunk_token_threshold (int): Chunk token threshold for splitting (default: 1e9).
+        overlap_rate (float): Overlap rate for chunking (default: 0.5).
+        word_token_rate (float): Word token rate for chunking (default: 0.2).
+        verbose (bool): Enable verbose logging (default: False).
+        logger (AsyncLogger): Custom logger for LLM operations (optional).
+    """
    _UNWANTED_PROPS = {
-        'provider' : 'Instead, use llmConfig=LlmConfig(provider="...")',
-        'api_token' : 'Instead, use llmConfig=LlMConfig(api_token="...")',
-        'base_url' : 'Instead, use llmConfig=LlmConfig(base_url="...")',
-        'api_base' : 'Instead, use llmConfig=LlmConfig(base_url="...")',
+        'provider' : 'Instead, use llm_config=LLMConfig(provider="...")',
+        'api_token' : 'Instead, use llm_config=LlMConfig(api_token="...")',
+        'base_url' : 'Instead, use llm_config=LLMConfig(base_url="...")',
+        'api_base' : 'Instead, use llm_config=LLMConfig(base_url="...")',
    }

    def __init__(
        self,
-        provider: str = DEFAULT_PROVIDER,
-        api_token: Optional[str] = None,
-        llmConfig: "LlmConfig" = None,
+        llm_config: "LLMConfig" = None,
        instruction: str = None,
        chunk_token_threshold: int = int(1e9),
        overlap_rate: float = OVERLAP_RATE,
        word_token_rate: float = WORD_TOKEN_RATE,
-        base_url: Optional[str] = None,
-        api_base: Optional[str] = None,
-        extra_args: Dict = None,
        # char_token_rate: float = WORD_TOKEN_RATE * 5,
        # chunk_mode: str = "char",
        verbose: bool = False,
        logger: Optional[AsyncLogger] = None,
        ignore_cache: bool = True,
+        # Deprecated properties
+        provider: str = DEFAULT_PROVIDER,
+        api_token: Optional[str] = None,
+        base_url: Optional[str] = None,
+        api_base: Optional[str] = None,
+        extra_args: Dict = None,
    ):
        super().__init__(None)
        self.provider = provider
        self.api_token = api_token
        self.base_url = base_url or api_base
-        self.llmConfig = llmConfig
+        self.llm_config = llm_config
        self.instruction = instruction
        self.chunk_token_threshold = chunk_token_threshold
        self.overlap_rate = overlap_rate
@@ -872,7 +891,7 @@ class LLMContentFilter(RelevantContentFilter):
            self.logger.info(
                "Starting LLM markdown content filtering process",
                tag="LLM",
-                params={"provider": self.llmConfig.provider},
+                params={"provider": self.llm_config.provider},
                colors={"provider": Fore.CYAN},
            )

@@ -959,10 +978,10 @@ class LLMContentFilter(RelevantContentFilter):

                future = executor.submit(
                    _proceed_with_chunk,
-                    self.llmConfig.provider,
+                    self.llm_config.provider,
                    prompt,
-                    self.llmConfig.api_token,
-                    self.llmConfig.base_url,
+                    self.llm_config.api_token,
+                    self.llm_config.base_url,
                    self.extra_args,
                )
                futures.append((i, future))
--- a/crawl4ai/extraction_strategy.py
+++ b/crawl4ai/extraction_strategy.py
@@ -4,12 +4,10 @@ from typing import Any, List, Dict, Optional
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import json
 import time
-import os

 from .prompts import PROMPT_EXTRACT_BLOCKS, PROMPT_EXTRACT_BLOCKS_WITH_INSTRUCTION, PROMPT_EXTRACT_SCHEMA_WITH_INSTRUCTION, JSON_SCHEMA_BUILDER_XPATH
 from .config import (
-    DEFAULT_PROVIDER, PROVIDER_MODELS, 
-    CHUNK_TOKEN_THRESHOLD,
+    DEFAULT_PROVIDER, CHUNK_TOKEN_THRESHOLD,
    OVERLAP_RATE,
    WORD_TOKEN_RATE,
 )
@@ -22,9 +20,7 @@ from .utils import (
    extract_xml_data,
    split_and_parse_json_objects,
    sanitize_input_encode,
-    chunk_documents,
    merge_chunks,
-    advanced_split,
 )
 from .models import * # noqa: F403

@@ -38,8 +34,9 @@ from .model_loader import (
    calculate_batch_size
 )

+from .types import LLMConfig
+
 from functools import partial
-import math
 import numpy as np
 import re
 from bs4 import BeautifulSoup
@@ -481,8 +478,7 @@ class LLMExtractionStrategy(ExtractionStrategy):
    A strategy that uses an LLM to extract meaningful content from the HTML.

    Attributes:
-        provider: The provider to use for extraction. It follows the format <provider_name>/<model_name>, e.g., "ollama/llama3.3".
-        api_token: The API token for the provider.
+        llm_config: The LLM configuration object.
        instruction: The instruction to use for the LLM model.
        schema: Pydantic model schema for structured data.
        extraction_type: "block" or "schema".
@@ -490,27 +486,20 @@ class LLMExtractionStrategy(ExtractionStrategy):
        overlap_rate: Overlap between chunks.
        word_token_rate: Word to token conversion rate.
        apply_chunking: Whether to apply chunking.
-        base_url: The base URL for the API request.
-        api_base: The base URL for the API request.
-        extra_args: Additional arguments for the API request, such as temprature, max_tokens, etc.
        verbose: Whether to print verbose output.
        usages: List of individual token usages.
        total_usage: Accumulated token usage.
    """
    _UNWANTED_PROPS = {
-            'provider' : 'Instead, use llmConfig=LlmConfig(provider="...")',
-            'api_token' : 'Instead, use llmConfig=LlMConfig(api_token="...")',
-            'base_url' : 'Instead, use llmConfig=LlmConfig(base_url="...")',
-            'api_base' : 'Instead, use llmConfig=LlmConfig(base_url="...")',
+            'provider' : 'Instead, use llm_config=LLMConfig(provider="...")',
+            'api_token' : 'Instead, use llm_config=LlMConfig(api_token="...")',
+            'base_url' : 'Instead, use llm_config=LLMConfig(base_url="...")',
+            'api_base' : 'Instead, use llm_config=LLMConfig(base_url="...")',
        }
    def __init__(
        self,
-        llmConfig: 'LLMConfig' = None,
+        llm_config: 'LLMConfig' = None,
        instruction: str = None,
-        provider: str = DEFAULT_PROVIDER,
-        api_token: Optional[str] = None,
-        base_url: str = None,
-        api_base: str = None,
        schema: Dict = None,
        extraction_type="block",
        chunk_token_threshold=CHUNK_TOKEN_THRESHOLD,
@@ -519,15 +508,18 @@ class LLMExtractionStrategy(ExtractionStrategy):
        apply_chunking=True,
        input_format: str = "markdown",
        verbose=False,
+        # Deprecated arguments
+        provider: str = DEFAULT_PROVIDER,
+        api_token: Optional[str] = None,
+        base_url: str = None,
+        api_base: str = None,
        **kwargs,
    ):
        """
        Initialize the strategy with clustering parameters.

        Args:
-            llmConfig: The LLM configuration object.
-            provider: The provider to use for extraction. It follows the format <provider_name>/<model_name>, e.g., "ollama/llama3.3".
-            api_token: The API token for the provider.
+            llm_config: The LLM configuration object.
            instruction: The instruction to use for the LLM model.
            schema: Pydantic model schema for structured data.
            extraction_type: "block" or "schema".
@@ -535,20 +527,19 @@ class LLMExtractionStrategy(ExtractionStrategy):
            overlap_rate: Overlap between chunks.
            word_token_rate: Word to token conversion rate.
            apply_chunking: Whether to apply chunking.
-            base_url: The base URL for the API request.
-            api_base: The base URL for the API request.
-            extra_args: Additional arguments for the API request, such as temprature, max_tokens, etc.
            verbose: Whether to print verbose output.
            usages: List of individual token usages.
            total_usage: Accumulated token usage.

+            # Deprecated arguments, will be removed very soon
+            provider: The provider to use for extraction. It follows the format <provider_name>/<model_name>, e.g., "ollama/llama3.3".
+            api_token: The API token for the provider.
+            base_url: The base URL for the API request.
+            api_base: The base URL for the API request.
+            extra_args: Additional arguments for the API request, such as temprature, max_tokens, etc.
        """
        super().__init__( input_format=input_format, **kwargs)
-        self.llmConfig = llmConfig
-        self.provider = provider
-        self.api_token = api_token
-        self.base_url = base_url
-        self.api_base = api_base
+        self.llm_config = llm_config
        self.instruction = instruction
        self.extract_type = extraction_type
        self.schema = schema
@@ -565,6 +556,11 @@ class LLMExtractionStrategy(ExtractionStrategy):
        self.usages = []  # Store individual usages
        self.total_usage = TokenUsage()  # Accumulated usage

+        self.provider = provider
+        self.api_token = api_token
+        self.base_url = base_url
+        self.api_base = api_base
+
    
    def __setattr__(self, name, value):
        """Handle attribute setting."""
@@ -618,10 +614,10 @@ class LLMExtractionStrategy(ExtractionStrategy):
            )

        response = perform_completion_with_backoff(
-            self.llmConfig.provider,
+            self.llm_config.provider,
            prompt_with_variables,
-            self.llmConfig.api_token,
-            base_url=self.llmConfig.base_url,
+            self.llm_config.api_token,
+            base_url=self.llm_config.base_url,
            extra_args=self.extra_args,
        )  # , json_response=self.extract_type == "schema")
        # Track usage
@@ -701,7 +697,7 @@ class LLMExtractionStrategy(ExtractionStrategy):
            overlap=int(self.chunk_token_threshold * self.overlap_rate),
        )
        extracted_content = []
-        if self.llmConfig.provider.startswith("groq/"):
+        if self.llm_config.provider.startswith("groq/"):
            # Sequential processing with a delay
            for ix, section in enumerate(merged_sections):
                extract_func = partial(self.extract, url)
@@ -1043,8 +1039,8 @@ class JsonElementExtractionStrategy(ExtractionStrategy):
        pass

    _GENERATE_SCHEMA_UNWANTED_PROPS = {
-        'provider': 'Instead, use llmConfig=LlmConfig(provider="...")',
-        'api_token': 'Instead, use llmConfig=LlMConfig(api_token="...")',
+        'provider': 'Instead, use llm_config=LLMConfig(provider="...")',
+        'api_token': 'Instead, use llm_config=LlMConfig(api_token="...")',
    }

    @staticmethod
@@ -1053,7 +1049,7 @@ class JsonElementExtractionStrategy(ExtractionStrategy):
        schema_type: str = "CSS", # or XPATH
        query: str = None,
        target_json_example: str = None,
-        llmConfig: 'LLMConfig' = None,
+        llm_config: 'LLMConfig' = None,
        provider: str = None,
        api_token: str = None,
        **kwargs
@@ -1066,7 +1062,7 @@ class JsonElementExtractionStrategy(ExtractionStrategy):
            query (str, optional): Natural language description of what data to extract
            provider (str): Legacy Parameter. LLM provider to use 
            api_token (str): Legacy Parameter. API token for LLM provider
-            llmConfig (LlmConfig): LLM configuration object
+            llm_config (LLMConfig): LLM configuration object
            prompt (str, optional): Custom prompt template to use
            **kwargs: Additional args passed to perform_completion_with_backoff
            
@@ -1130,10 +1126,10 @@ In this scenario, use your best judgment to generate the schema. Try to maximize
        try:
            # Call LLM with backoff handling
            response = perform_completion_with_backoff(
-                provider=llmConfig.provider,
+                provider=llm_config.provider,
                prompt_with_variables="\n\n".join([system_message["content"], user_message["content"]]),
                json_response = True,                
-                api_token=llmConfig.api_token,
+                api_token=llm_config.api_token,
                **kwargs
            )
            
--- a/crawl4ai/markdown_generation_strategy.py
+++ b/crawl4ai/markdown_generation_strategy.py
@@ -1,9 +1,9 @@
 from abc import ABC, abstractmethod
-from tabnanny import verbose
 from typing import Optional, Dict, Any, Tuple
 from .models import MarkdownGenerationResult
 from .html2text import CustomHTML2Text
-from .content_filter_strategy import RelevantContentFilter
+from .types import RelevantContentFilter
+# from .content_filter_strategy import RelevantContentFilter
 import re
 from urllib.parse import urljoin

--- a/crawl4ai/types.py
+++ b/crawl4ai/types.py
@@ -1,14 +1,181 @@
 from typing import TYPE_CHECKING, Union

-AsyncWebCrawler = Union['AsyncWebCrawlerType']  # Note the string literal
-CrawlerRunConfig = Union['CrawlerRunConfigType']
+# Logger types
+AsyncLoggerBase = Union['AsyncLoggerBaseType']
+AsyncLogger = Union['AsyncLoggerType']
+
+# Crawler core types
+AsyncWebCrawler = Union['AsyncWebCrawlerType']
+CacheMode = Union['CacheModeType']
 CrawlResult = Union['CrawlResultType']
+CrawlerHub = Union['CrawlerHubType']
+BrowserProfiler = Union['BrowserProfilerType']
+
+# Configuration types
+BrowserConfig = Union['BrowserConfigType']
+CrawlerRunConfig = Union['CrawlerRunConfigType']
+HTTPCrawlerConfig = Union['HTTPCrawlerConfigType']
+LLMConfig = Union['LLMConfigType']
+
+# Content scraping types
+ContentScrapingStrategy = Union['ContentScrapingStrategyType']
+WebScrapingStrategy = Union['WebScrapingStrategyType']
+LXMLWebScrapingStrategy = Union['LXMLWebScrapingStrategyType']
+
+# Proxy types
+ProxyRotationStrategy = Union['ProxyRotationStrategyType']
+RoundRobinProxyStrategy = Union['RoundRobinProxyStrategyType']
+
+# Extraction types
+ExtractionStrategy = Union['ExtractionStrategyType']
+LLMExtractionStrategy = Union['LLMExtractionStrategyType']
+CosineStrategy = Union['CosineStrategyType']
+JsonCssExtractionStrategy = Union['JsonCssExtractionStrategyType']
+JsonXPathExtractionStrategy = Union['JsonXPathExtractionStrategyType']
+
+# Chunking types
+ChunkingStrategy = Union['ChunkingStrategyType']
+RegexChunking = Union['RegexChunkingType']
+
+# Markdown generation types
+DefaultMarkdownGenerator = Union['DefaultMarkdownGeneratorType']
+MarkdownGenerationResult = Union['MarkdownGenerationResultType']
+
+# Content filter types
+RelevantContentFilter = Union['RelevantContentFilterType']
+PruningContentFilter = Union['PruningContentFilterType']
+BM25ContentFilter = Union['BM25ContentFilterType']
+LLMContentFilter = Union['LLMContentFilterType']
+
+# Dispatcher types
+BaseDispatcher = Union['BaseDispatcherType']
+MemoryAdaptiveDispatcher = Union['MemoryAdaptiveDispatcherType']
+SemaphoreDispatcher = Union['SemaphoreDispatcherType']
+RateLimiter = Union['RateLimiterType']
+CrawlerMonitor = Union['CrawlerMonitorType']
+DisplayMode = Union['DisplayModeType']
 RunManyReturn = Union['RunManyReturnType']

+# Docker client
+Crawl4aiDockerClient = Union['Crawl4aiDockerClientType']
+
+# Deep crawling types
+DeepCrawlStrategy = Union['DeepCrawlStrategyType']
+BFSDeepCrawlStrategy = Union['BFSDeepCrawlStrategyType']
+FilterChain = Union['FilterChainType']
+ContentTypeFilter = Union['ContentTypeFilterType']
+DomainFilter = Union['DomainFilterType']
+URLFilter = Union['URLFilterType']
+FilterStats = Union['FilterStatsType']
+SEOFilter = Union['SEOFilterType']
+KeywordRelevanceScorer = Union['KeywordRelevanceScorerType']
+URLScorer = Union['URLScorerType']
+CompositeScorer = Union['CompositeScorerType']
+DomainAuthorityScorer = Union['DomainAuthorityScorerType']
+FreshnessScorer = Union['FreshnessScorerType']
+PathDepthScorer = Union['PathDepthScorerType']
+BestFirstCrawlingStrategy = Union['BestFirstCrawlingStrategyType']
+DFSDeepCrawlStrategy = Union['DFSDeepCrawlStrategyType']
+DeepCrawlDecorator = Union['DeepCrawlDecoratorType']
+
+# Only import types during type checking to avoid circular imports
 if TYPE_CHECKING:
-    from . import (
+    # Logger imports
+    from .async_logger import (
+        AsyncLoggerBase as AsyncLoggerBaseType,
+        AsyncLogger as AsyncLoggerType,
+    )
+    
+    # Crawler core imports
+    from .async_webcrawler import (
        AsyncWebCrawler as AsyncWebCrawlerType,
+        CacheMode as CacheModeType,
+    )
+    from .models import CrawlResult as CrawlResultType
+    from .hub import CrawlerHub as CrawlerHubType
+    from .browser_profiler import BrowserProfiler as BrowserProfilerType
+    
+    # Configuration imports
+    from .async_configs import (
+        BrowserConfig as BrowserConfigType,
        CrawlerRunConfig as CrawlerRunConfigType,
-        CrawlResult as CrawlResultType,
+        HTTPCrawlerConfig as HTTPCrawlerConfigType,
+        LLMConfig as LLMConfigType,
+    )
+    
+    # Content scraping imports
+    from .content_scraping_strategy import (
+        ContentScrapingStrategy as ContentScrapingStrategyType,
+        WebScrapingStrategy as WebScrapingStrategyType,
+        LXMLWebScrapingStrategy as LXMLWebScrapingStrategyType,
+    )
+    
+    # Proxy imports
+    from .proxy_strategy import (
+        ProxyRotationStrategy as ProxyRotationStrategyType,
+        RoundRobinProxyStrategy as RoundRobinProxyStrategyType,
+    )
+    
+    # Extraction imports
+    from .extraction_strategy import (
+        ExtractionStrategy as ExtractionStrategyType,
+        LLMExtractionStrategy as LLMExtractionStrategyType,
+        CosineStrategy as CosineStrategyType,
+        JsonCssExtractionStrategy as JsonCssExtractionStrategyType,
+        JsonXPathExtractionStrategy as JsonXPathExtractionStrategyType,
+    )
+    
+    # Chunking imports
+    from .chunking_strategy import (
+        ChunkingStrategy as ChunkingStrategyType,
+        RegexChunking as RegexChunkingType,
+    )
+    
+    # Markdown generation imports
+    from .markdown_generation_strategy import (
+        DefaultMarkdownGenerator as DefaultMarkdownGeneratorType,
+    )
+    from .models import MarkdownGenerationResult as MarkdownGenerationResultType
+    
+    # Content filter imports
+    from .content_filter_strategy import (
+        RelevantContentFilter as RelevantContentFilterType,
+        PruningContentFilter as PruningContentFilterType,
+        BM25ContentFilter as BM25ContentFilterType,
+        LLMContentFilter as LLMContentFilterType,
+    )
+    
+    # Dispatcher imports
+    from .async_dispatcher import (
+        BaseDispatcher as BaseDispatcherType,
+        MemoryAdaptiveDispatcher as MemoryAdaptiveDispatcherType,
+        SemaphoreDispatcher as SemaphoreDispatcherType,
+        RateLimiter as RateLimiterType,
+        CrawlerMonitor as CrawlerMonitorType,
+        DisplayMode as DisplayModeType,
        RunManyReturn as RunManyReturnType,
+    )
+    
+    # Docker client
+    from .docker_client import Crawl4aiDockerClient as Crawl4aiDockerClientType
+    
+    # Deep crawling imports
+    from .deep_crawling import (
+        DeepCrawlStrategy as DeepCrawlStrategyType,
+        BFSDeepCrawlStrategy as BFSDeepCrawlStrategyType,
+        FilterChain as FilterChainType,
+        ContentTypeFilter as ContentTypeFilterType,
+        DomainFilter as DomainFilterType,
+        URLFilter as URLFilterType,
+        FilterStats as FilterStatsType,
+        SEOFilter as SEOFilterType,
+        KeywordRelevanceScorer as KeywordRelevanceScorerType,
+        URLScorer as URLScorerType,
+        CompositeScorer as CompositeScorerType,
+        DomainAuthorityScorer as DomainAuthorityScorerType,
+        FreshnessScorer as FreshnessScorerType,
+        PathDepthScorer as PathDepthScorerType,
+        BestFirstCrawlingStrategy as BestFirstCrawlingStrategyType,
+        DFSDeepCrawlStrategy as DFSDeepCrawlStrategyType,
+        DeepCrawlDecorator as DeepCrawlDecoratorType,
    )