refactor(link_extractor): remove link_extractor and rename to link_preview

This change removes the link_extractor module and renames it to link_preview, streamlining the codebase. The removal of 395 lines of code reduces complexity and improves maintainability. Other files have been updated to reflect this change, ensuring consistency across the project.

BREAKING CHANGE: The link_extractor module has been deleted and replaced with link_preview. Update imports accordingly.
This commit is contained in:
UncleCode
2025-06-27 21:54:22 +08:00
parent 5c9c305dbf
commit 539a324cf6
7 changed files with 71 additions and 71 deletions

View File

@@ -37,7 +37,7 @@ from .content_filter_strategy import (
) )
from .models import CrawlResult, MarkdownGenerationResult, DisplayMode from .models import CrawlResult, MarkdownGenerationResult, DisplayMode
from .components.crawler_monitor import CrawlerMonitor from .components.crawler_monitor import CrawlerMonitor
from .link_extractor import LinkExtractor from .link_preview import LinkPreview
from .async_dispatcher import ( from .async_dispatcher import (
MemoryAdaptiveDispatcher, MemoryAdaptiveDispatcher,
SemaphoreDispatcher, SemaphoreDispatcher,
@@ -142,7 +142,7 @@ __all__ = [
"SemaphoreDispatcher", "SemaphoreDispatcher",
"RateLimiter", "RateLimiter",
"CrawlerMonitor", "CrawlerMonitor",
"LinkExtractor", "LinkPreview",
"DisplayMode", "DisplayMode",
"MarkdownGenerationResult", "MarkdownGenerationResult",
"Crawl4aiDockerClient", "Crawl4aiDockerClient",

View File

@@ -594,7 +594,7 @@ class BrowserConfig:
return config return config
return BrowserConfig.from_kwargs(config) return BrowserConfig.from_kwargs(config)
class LinkExtractionConfig: class LinkPreviewConfig:
"""Configuration for link head extraction and scoring.""" """Configuration for link head extraction and scoring."""
def __init__( def __init__(
@@ -649,12 +649,12 @@ class LinkExtractionConfig:
raise ValueError("At least one of include_internal or include_external must be True") raise ValueError("At least one of include_internal or include_external must be True")
@staticmethod @staticmethod
def from_dict(config_dict: Dict[str, Any]) -> "LinkExtractionConfig": def from_dict(config_dict: Dict[str, Any]) -> "LinkPreviewConfig":
"""Create LinkExtractionConfig from dictionary (for backward compatibility).""" """Create LinkPreviewConfig from dictionary (for backward compatibility)."""
if not config_dict: if not config_dict:
return None return None
return LinkExtractionConfig( return LinkPreviewConfig(
include_internal=config_dict.get("include_internal", True), include_internal=config_dict.get("include_internal", True),
include_external=config_dict.get("include_external", False), include_external=config_dict.get("include_external", False),
include_patterns=config_dict.get("include_patterns"), include_patterns=config_dict.get("include_patterns"),
@@ -682,11 +682,11 @@ class LinkExtractionConfig:
"verbose": self.verbose "verbose": self.verbose
} }
def clone(self, **kwargs) -> "LinkExtractionConfig": def clone(self, **kwargs) -> "LinkPreviewConfig":
"""Create a copy with updated values.""" """Create a copy with updated values."""
config_dict = self.to_dict() config_dict = self.to_dict()
config_dict.update(kwargs) config_dict.update(kwargs)
return LinkExtractionConfig.from_dict(config_dict) return LinkPreviewConfig.from_dict(config_dict)
class HTTPCrawlerConfig: class HTTPCrawlerConfig:
@@ -925,7 +925,7 @@ class CrawlerRunConfig():
exclude_internal_links (bool): If True, exclude internal links from the results. exclude_internal_links (bool): If True, exclude internal links from the results.
Default: False. Default: False.
score_links (bool): If True, calculate intrinsic quality scores for all links using URL structure, score_links (bool): If True, calculate intrinsic quality scores for all links using URL structure,
text quality, and contextual relevance metrics. Separate from link_extraction_config. text quality, and contextual relevance metrics. Separate from link_preview_config.
Default: False. Default: False.
# Debugging and Logging Parameters # Debugging and Logging Parameters
@@ -1055,7 +1055,7 @@ class CrawlerRunConfig():
# Deep Crawl Parameters # Deep Crawl Parameters
deep_crawl_strategy: Optional[DeepCrawlStrategy] = None, deep_crawl_strategy: Optional[DeepCrawlStrategy] = None,
# Link Extraction Parameters # Link Extraction Parameters
link_extraction_config: Union[LinkExtractionConfig, Dict[str, Any]] = None, link_preview_config: Union[LinkPreviewConfig, Dict[str, Any]] = None,
# Experimental Parameters # Experimental Parameters
experimental: Dict[str, Any] = None, experimental: Dict[str, Any] = None,
): ):
@@ -1187,15 +1187,15 @@ class CrawlerRunConfig():
self.deep_crawl_strategy = deep_crawl_strategy self.deep_crawl_strategy = deep_crawl_strategy
# Link Extraction Parameters # Link Extraction Parameters
if link_extraction_config is None: if link_preview_config is None:
self.link_extraction_config = None self.link_preview_config = None
elif isinstance(link_extraction_config, LinkExtractionConfig): elif isinstance(link_preview_config, LinkPreviewConfig):
self.link_extraction_config = link_extraction_config self.link_preview_config = link_preview_config
elif isinstance(link_extraction_config, dict): elif isinstance(link_preview_config, dict):
# Convert dict to config object for backward compatibility # Convert dict to config object for backward compatibility
self.link_extraction_config = LinkExtractionConfig.from_dict(link_extraction_config) self.link_preview_config = LinkPreviewConfig.from_dict(link_preview_config)
else: else:
raise ValueError("link_extraction_config must be LinkExtractionConfig object or dict") raise ValueError("link_preview_config must be LinkPreviewConfig object or dict")
# Experimental Parameters # Experimental Parameters
self.experimental = experimental or {} self.experimental = experimental or {}
@@ -1371,7 +1371,7 @@ class CrawlerRunConfig():
# Deep Crawl Parameters # Deep Crawl Parameters
deep_crawl_strategy=kwargs.get("deep_crawl_strategy"), deep_crawl_strategy=kwargs.get("deep_crawl_strategy"),
# Link Extraction Parameters # Link Extraction Parameters
link_extraction_config=kwargs.get("link_extraction_config"), link_preview_config=kwargs.get("link_preview_config"),
url=kwargs.get("url"), url=kwargs.get("url"),
# Experimental Parameters # Experimental Parameters
experimental=kwargs.get("experimental"), experimental=kwargs.get("experimental"),
@@ -1467,7 +1467,7 @@ class CrawlerRunConfig():
"user_agent_mode": self.user_agent_mode, "user_agent_mode": self.user_agent_mode,
"user_agent_generator_config": self.user_agent_generator_config, "user_agent_generator_config": self.user_agent_generator_config,
"deep_crawl_strategy": self.deep_crawl_strategy, "deep_crawl_strategy": self.deep_crawl_strategy,
"link_extraction_config": self.link_extraction_config.to_dict() if self.link_extraction_config else None, "link_preview_config": self.link_preview_config.to_dict() if self.link_preview_config else None,
"url": self.url, "url": self.url,
"experimental": self.experimental, "experimental": self.experimental,
} }

View File

@@ -948,14 +948,14 @@ class WebScrapingStrategy(ContentScrapingStrategy):
links["external"] = list(external_links_dict.values()) links["external"] = list(external_links_dict.values())
# Extract head content for links if configured # Extract head content for links if configured
link_extraction_config = kwargs.get("link_extraction_config") link_preview_config = kwargs.get("link_preview_config")
if link_extraction_config is not None: if link_preview_config is not None:
try: try:
import asyncio import asyncio
from .link_extractor import LinkExtractor from .link_preview import LinkPreview
from .models import Links, Link from .models import Links, Link
verbose = link_extraction_config.verbose verbose = link_preview_config.verbose
if verbose: if verbose:
self._log("info", "Starting link head extraction for {internal} internal and {external} external links", self._log("info", "Starting link head extraction for {internal} internal and {external} external links",
@@ -966,17 +966,17 @@ class WebScrapingStrategy(ContentScrapingStrategy):
external_links = [Link(**link_data) for link_data in links["external"]] external_links = [Link(**link_data) for link_data in links["external"]]
links_obj = Links(internal=internal_links, external=external_links) links_obj = Links(internal=internal_links, external=external_links)
# Create a config object for LinkExtractor # Create a config object for LinkPreview
class TempCrawlerRunConfig: class TempCrawlerRunConfig:
def __init__(self, link_config, score_links): def __init__(self, link_config, score_links):
self.link_extraction_config = link_config self.link_preview_config = link_config
self.score_links = score_links self.score_links = score_links
config = TempCrawlerRunConfig(link_extraction_config, kwargs.get("score_links", False)) config = TempCrawlerRunConfig(link_preview_config, kwargs.get("score_links", False))
# Extract head content (run async operation in sync context) # Extract head content (run async operation in sync context)
async def extract_links(): async def extract_links():
async with LinkExtractor(self.logger) as extractor: async with LinkPreview(self.logger) as extractor:
return await extractor.extract_link_heads(links_obj, config) return await extractor.extract_link_heads(links_obj, config)
# Run the async operation # Run the async operation
@@ -1740,21 +1740,21 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy):
with_tail=False, with_tail=False,
).strip() ).strip()
# Create links dictionary in the format expected by LinkExtractor # Create links dictionary in the format expected by LinkPreview
links = { links = {
"internal": list(internal_links_dict.values()), "internal": list(internal_links_dict.values()),
"external": list(external_links_dict.values()), "external": list(external_links_dict.values()),
} }
# Extract head content for links if configured # Extract head content for links if configured
link_extraction_config = kwargs.get("link_extraction_config") link_preview_config = kwargs.get("link_preview_config")
if link_extraction_config is not None: if link_preview_config is not None:
try: try:
import asyncio import asyncio
from .link_extractor import LinkExtractor from .link_preview import LinkPreview
from .models import Links, Link from .models import Links, Link
verbose = link_extraction_config.verbose verbose = link_preview_config.verbose
if verbose: if verbose:
self._log("info", "Starting link head extraction for {internal} internal and {external} external links", self._log("info", "Starting link head extraction for {internal} internal and {external} external links",
@@ -1765,17 +1765,17 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy):
external_links = [Link(**link_data) for link_data in links["external"]] external_links = [Link(**link_data) for link_data in links["external"]]
links_obj = Links(internal=internal_links, external=external_links) links_obj = Links(internal=internal_links, external=external_links)
# Create a config object for LinkExtractor # Create a config object for LinkPreview
class TempCrawlerRunConfig: class TempCrawlerRunConfig:
def __init__(self, link_config, score_links): def __init__(self, link_config, score_links):
self.link_extraction_config = link_config self.link_preview_config = link_config
self.score_links = score_links self.score_links = score_links
config = TempCrawlerRunConfig(link_extraction_config, kwargs.get("score_links", False)) config = TempCrawlerRunConfig(link_preview_config, kwargs.get("score_links", False))
# Extract head content (run async operation in sync context) # Extract head content (run async operation in sync context)
async def extract_links(): async def extract_links():
async with LinkExtractor(self.logger) as extractor: async with LinkPreview(self.logger) as extractor:
return await extractor.extract_link_heads(links_obj, config) return await extractor.extract_link_heads(links_obj, config)
# Run the async operation # Run the async operation

View File

@@ -15,7 +15,7 @@ from .models import Links, Link
from .utils import calculate_total_score from .utils import calculate_total_score
class LinkExtractor: class LinkPreview:
""" """
Extracts head content from links using URLSeeder's parallel processing infrastructure. Extracts head content from links using URLSeeder's parallel processing infrastructure.
@@ -29,7 +29,7 @@ class LinkExtractor:
def __init__(self, logger: Optional[AsyncLogger] = None): def __init__(self, logger: Optional[AsyncLogger] = None):
""" """
Initialize the LinkExtractor. Initialize the LinkPreview.
Args: Args:
logger: Optional logger instance for recording events logger: Optional logger instance for recording events
@@ -78,12 +78,12 @@ class LinkExtractor:
Args: Args:
links: Links object containing internal and external links links: Links object containing internal and external links
config: CrawlerRunConfig with link_extraction_config settings config: CrawlerRunConfig with link_preview_config settings
Returns: Returns:
Links object with head_data attached to filtered Link objects Links object with head_data attached to filtered Link objects
""" """
link_config = config.link_extraction_config link_config = config.link_preview_config
# Ensure seeder is initialized # Ensure seeder is initialized
await self.start() await self.start()
@@ -331,7 +331,7 @@ class LinkExtractor:
intrinsic_score=updated_link.intrinsic_score, intrinsic_score=updated_link.intrinsic_score,
contextual_score=updated_link.contextual_score, contextual_score=updated_link.contextual_score,
score_links_enabled=getattr(config, 'score_links', False), score_links_enabled=getattr(config, 'score_links', False),
query_provided=bool(config.link_extraction_config.query) query_provided=bool(config.link_preview_config.query)
) )
updated_internal.append(updated_link) updated_internal.append(updated_link)
@@ -369,7 +369,7 @@ class LinkExtractor:
intrinsic_score=updated_link.intrinsic_score, intrinsic_score=updated_link.intrinsic_score,
contextual_score=updated_link.contextual_score, contextual_score=updated_link.contextual_score,
score_links_enabled=getattr(config, 'score_links', False), score_links_enabled=getattr(config, 'score_links', False),
query_provided=bool(config.link_extraction_config.query) query_provided=bool(config.link_preview_config.query)
) )
updated_external.append(updated_link) updated_external.append(updated_link)

View File

@@ -18,7 +18,7 @@ Usage:
import asyncio import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.async_configs import LinkExtractionConfig from crawl4ai.async_configs import LinkPreviewConfig
async def basic_link_head_extraction(): async def basic_link_head_extraction():
@@ -30,7 +30,7 @@ async def basic_link_head_extraction():
config = CrawlerRunConfig( config = CrawlerRunConfig(
# Enable link head extraction # Enable link head extraction
link_extraction_config=LinkExtractionConfig( link_preview_config=LinkPreviewConfig(
include_internal=True, # Process internal links include_internal=True, # Process internal links
include_external=False, # Skip external links for this demo include_external=False, # Skip external links for this demo
max_links=5, # Limit to 5 links max_links=5, # Limit to 5 links
@@ -94,7 +94,7 @@ async def research_assistant_example():
print("=" * 50) print("=" * 50)
config = CrawlerRunConfig( config = CrawlerRunConfig(
link_extraction_config=LinkExtractionConfig( link_preview_config=LinkPreviewConfig(
include_internal=True, include_internal=True,
include_external=True, include_external=True,
include_patterns=["*/docs/*", "*/tutorial/*", "*/guide/*"], include_patterns=["*/docs/*", "*/tutorial/*", "*/guide/*"],
@@ -149,7 +149,7 @@ async def api_discovery_example():
print("=" * 50) print("=" * 50)
config = CrawlerRunConfig( config = CrawlerRunConfig(
link_extraction_config=LinkExtractionConfig( link_preview_config=LinkPreviewConfig(
include_internal=True, include_internal=True,
include_patterns=["*/api/*", "*/reference/*", "*/endpoint/*"], include_patterns=["*/api/*", "*/reference/*", "*/endpoint/*"],
exclude_patterns=["*/deprecated/*", "*/v1/*"], # Skip old versions exclude_patterns=["*/deprecated/*", "*/v1/*"], # Skip old versions
@@ -214,7 +214,7 @@ async def link_quality_analysis():
print("=" * 50) print("=" * 50)
config = CrawlerRunConfig( config = CrawlerRunConfig(
link_extraction_config=LinkExtractionConfig( link_preview_config=LinkPreviewConfig(
include_internal=True, include_internal=True,
max_links=30, # Analyze more links for better statistics max_links=30, # Analyze more links for better statistics
concurrency=15, concurrency=15,
@@ -281,7 +281,7 @@ async def pattern_filtering_example():
filters = [ filters = [
{ {
"name": "Documentation Only", "name": "Documentation Only",
"config": LinkExtractionConfig( "config": LinkPreviewConfig(
include_internal=True, include_internal=True,
max_links=10, max_links=10,
concurrency=5, concurrency=5,
@@ -292,7 +292,7 @@ async def pattern_filtering_example():
}, },
{ {
"name": "API References Only", "name": "API References Only",
"config": LinkExtractionConfig( "config": LinkPreviewConfig(
include_internal=True, include_internal=True,
max_links=10, max_links=10,
concurrency=5, concurrency=5,
@@ -303,7 +303,7 @@ async def pattern_filtering_example():
}, },
{ {
"name": "Exclude Admin Areas", "name": "Exclude Admin Areas",
"config": LinkExtractionConfig( "config": LinkPreviewConfig(
include_internal=True, include_internal=True,
max_links=10, max_links=10,
concurrency=5, concurrency=5,
@@ -318,7 +318,7 @@ async def pattern_filtering_example():
print(f"\n🔍 Testing: {filter_example['name']}") print(f"\n🔍 Testing: {filter_example['name']}")
config = CrawlerRunConfig( config = CrawlerRunConfig(
link_extraction_config=filter_example['config'], link_preview_config=filter_example['config'],
score_links=True score_links=True
) )

View File

@@ -125,7 +125,7 @@ Here's a full example you can copy, paste, and run immediately:
```python ```python
import asyncio import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.async_configs import LinkExtractionConfig from crawl4ai.async_configs import LinkPreviewConfig
async def extract_link_heads_example(): async def extract_link_heads_example():
""" """
@@ -136,7 +136,7 @@ async def extract_link_heads_example():
# Configure link head extraction # Configure link head extraction
config = CrawlerRunConfig( config = CrawlerRunConfig(
# Enable link head extraction with detailed configuration # Enable link head extraction with detailed configuration
link_extraction_config=LinkExtractionConfig( link_preview_config=LinkPreviewConfig(
include_internal=True, # Extract from internal links include_internal=True, # Extract from internal links
include_external=False, # Skip external links for this example include_external=False, # Skip external links for this example
max_links=10, # Limit to 10 links for demo max_links=10, # Limit to 10 links for demo
@@ -234,12 +234,12 @@ if __name__ == "__main__":
### 2.3 Configuration Deep Dive ### 2.3 Configuration Deep Dive
The `LinkExtractionConfig` class supports these options: The `LinkPreviewConfig` class supports these options:
```python ```python
from crawl4ai.async_configs import LinkExtractionConfig from crawl4ai.async_configs import LinkPreviewConfig
link_extraction_config = LinkExtractionConfig( link_preview_config = LinkPreviewConfig(
# BASIC SETTINGS # BASIC SETTINGS
verbose=True, # Show detailed logs (recommended for learning) verbose=True, # Show detailed logs (recommended for learning)
@@ -316,7 +316,7 @@ Find the most relevant documentation pages:
```python ```python
async def research_assistant(): async def research_assistant():
config = CrawlerRunConfig( config = CrawlerRunConfig(
link_extraction_config=LinkExtractionConfig( link_preview_config=LinkPreviewConfig(
include_internal=True, include_internal=True,
include_external=True, include_external=True,
include_patterns=["*/docs/*", "*/tutorial/*", "*/guide/*"], include_patterns=["*/docs/*", "*/tutorial/*", "*/guide/*"],
@@ -348,7 +348,7 @@ Find all API endpoints and references:
```python ```python
async def api_discovery(): async def api_discovery():
config = CrawlerRunConfig( config = CrawlerRunConfig(
link_extraction_config=LinkExtractionConfig( link_preview_config=LinkPreviewConfig(
include_internal=True, include_internal=True,
include_patterns=["*/api/*", "*/reference/*"], include_patterns=["*/api/*", "*/reference/*"],
exclude_patterns=["*/deprecated/*"], exclude_patterns=["*/deprecated/*"],
@@ -387,7 +387,7 @@ Analyze website structure and content quality:
```python ```python
async def quality_analysis(): async def quality_analysis():
config = CrawlerRunConfig( config = CrawlerRunConfig(
link_extraction_config=LinkExtractionConfig( link_preview_config=LinkPreviewConfig(
include_internal=True, include_internal=True,
max_links=200, max_links=200,
concurrency=20, concurrency=20,
@@ -434,7 +434,7 @@ async def quality_analysis():
```python ```python
# Check your configuration: # Check your configuration:
config = CrawlerRunConfig( config = CrawlerRunConfig(
link_extraction_config=LinkExtractionConfig( link_preview_config=LinkPreviewConfig(
verbose=True # ← Enable to see what's happening verbose=True # ← Enable to see what's happening
) )
) )
@@ -445,7 +445,7 @@ config = CrawlerRunConfig(
# Make sure scoring is enabled: # Make sure scoring is enabled:
config = CrawlerRunConfig( config = CrawlerRunConfig(
score_links=True, # ← Enable intrinsic scoring score_links=True, # ← Enable intrinsic scoring
link_extraction_config=LinkExtractionConfig( link_preview_config=LinkPreviewConfig(
query="your search terms" # ← For contextual scoring query="your search terms" # ← For contextual scoring
) )
) )
@@ -454,7 +454,7 @@ config = CrawlerRunConfig(
**Process taking too long?** **Process taking too long?**
```python ```python
# Optimize performance: # Optimize performance:
link_extraction_config = LinkExtractionConfig( link_preview_config = LinkPreviewConfig(
max_links=20, # ← Reduce number max_links=20, # ← Reduce number
concurrency=10, # ← Increase parallelism concurrency=10, # ← Increase parallelism
timeout=3, # ← Shorter timeout timeout=3, # ← Shorter timeout

View File

@@ -5,7 +5,7 @@ Test script for Link Extractor functionality
from crawl4ai.models import Link from crawl4ai.models import Link
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.async_configs import LinkExtractionConfig from crawl4ai.async_configs import LinkPreviewConfig
import asyncio import asyncio
import sys import sys
import os import os
@@ -22,7 +22,7 @@ async def test_link_extractor():
# Test configuration with link extraction AND scoring enabled # Test configuration with link extraction AND scoring enabled
config = CrawlerRunConfig( config = CrawlerRunConfig(
link_extraction_config=LinkExtractionConfig( link_preview_config=LinkPreviewConfig(
include_internal=True, include_internal=True,
include_external=False, # Only internal links for this test include_external=False, # Only internal links for this test
# No include/exclude patterns for first test - let's see what we get # No include/exclude patterns for first test - let's see what we get
@@ -53,7 +53,7 @@ async def test_link_extractor():
result = await crawler.arun(url, config=config) result = await crawler.arun(url, config=config)
# Debug: Check if link extraction config is being passed # Debug: Check if link extraction config is being passed
print(f"🔍 Debug - Link extraction config: {config.link_extraction_config.to_dict() if config.link_extraction_config else None}") print(f"🔍 Debug - Link extraction config: {config.link_preview_config.to_dict() if config.link_preview_config else None}")
print(f"🔍 Debug - Score links: {config.score_links}") print(f"🔍 Debug - Score links: {config.score_links}")
if result.success: if result.success:
@@ -187,7 +187,7 @@ def test_config_examples():
examples = [ examples = [
{ {
"name": "BM25 Scored Documentation Links", "name": "BM25 Scored Documentation Links",
"config": LinkExtractionConfig( "config": LinkPreviewConfig(
include_internal=True, include_internal=True,
include_external=False, include_external=False,
include_patterns=["*/docs/*", "*/api/*", "*/reference/*"], include_patterns=["*/docs/*", "*/api/*", "*/reference/*"],
@@ -199,7 +199,7 @@ def test_config_examples():
}, },
{ {
"name": "Internal Links Only", "name": "Internal Links Only",
"config": LinkExtractionConfig( "config": LinkPreviewConfig(
include_internal=True, include_internal=True,
include_external=False, include_external=False,
max_links=50, max_links=50,
@@ -208,7 +208,7 @@ def test_config_examples():
}, },
{ {
"name": "External Links with Patterns", "name": "External Links with Patterns",
"config": LinkExtractionConfig( "config": LinkPreviewConfig(
include_internal=False, include_internal=False,
include_external=True, include_external=True,
include_patterns=["*github.com*", "*stackoverflow.com*"], include_patterns=["*github.com*", "*stackoverflow.com*"],
@@ -218,7 +218,7 @@ def test_config_examples():
}, },
{ {
"name": "High-Performance Mode", "name": "High-Performance Mode",
"config": LinkExtractionConfig( "config": LinkPreviewConfig(
include_internal=True, include_internal=True,
include_external=False, include_external=False,
concurrency=20, concurrency=20,
@@ -237,9 +237,9 @@ def test_config_examples():
print(f" {key}: {value}") print(f" {key}: {value}")
print(" Usage:") print(" Usage:")
print(" from crawl4ai.async_configs import LinkExtractionConfig") print(" from crawl4ai.async_configs import LinkPreviewConfig")
print(" config = CrawlerRunConfig(") print(" config = CrawlerRunConfig(")
print(" link_extraction_config=LinkExtractionConfig(") print(" link_preview_config=LinkPreviewConfig(")
for key, value in config_dict.items(): for key, value in config_dict.items():
if isinstance(value, str): if isinstance(value, str):
print(f" {key}='{value}',") print(f" {key}='{value}',")