feat(docs): update examples and documentation to replace bypass_cache with cache_mode for improved clarity

This commit is contained in:
UncleCode
2024-11-17 19:44:45 +08:00
parent a59c107b23
commit df63a40606
17 changed files with 422 additions and 80 deletions

View File

@@ -10,14 +10,14 @@ import asyncio
from .models import CrawlResult
from .async_database import async_db_manager
from .chunking_strategy import *
from .content_filter_strategy import *
from .extraction_strategy import *
from .async_crawler_strategy import AsyncCrawlerStrategy, AsyncPlaywrightCrawlerStrategy, AsyncCrawlResponse
from .cache_context import CacheMode, CacheContext, _legacy_to_cache_mode
from .content_scrapping_strategy import WebScrapingStrategy
from .config import (
MIN_WORD_THRESHOLD,
IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD,
SHOW_DEPRECATION_WARNINGS # New import
IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD
)
from .utils import (
sanitize_input_encode,
@@ -49,7 +49,7 @@ class AsyncWebCrawler:
)
To disable deprecation warnings:
Set SHOW_DEPRECATION_WARNINGS = False in config.py
Pass warning=False to suppress the warning.
"""
def __init__(
@@ -85,11 +85,11 @@ class AsyncWebCrawler:
# Handle deprecated parameter
if always_by_pass_cache is not None:
if SHOW_DEPRECATION_WARNINGS:
if kwargs.get("warning", True):
warnings.warn(
"'always_by_pass_cache' is deprecated and will be removed in version X.X.X. "
"Use 'always_bypass_cache' instead. "
"Set SHOW_DEPRECATION_WARNINGS=False in config.py to suppress this warning.",
"Pass warning=False to suppress this warning.",
DeprecationWarning,
stacklevel=2
)
@@ -126,6 +126,7 @@ class AsyncWebCrawler:
word_count_threshold=MIN_WORD_THRESHOLD,
extraction_strategy: ExtractionStrategy = None,
chunking_strategy: ChunkingStrategy = RegexChunking(),
content_filter: RelevantContentFilter = None,
cache_mode: Optional[CacheMode] = None,
# Deprecated parameters
bypass_cache: bool = False,
@@ -172,7 +173,7 @@ class AsyncWebCrawler:
try:
# Handle deprecated parameters
if any([bypass_cache, disable_cache, no_cache_read, no_cache_write]):
if SHOW_DEPRECATION_WARNINGS:
if kwargs.get("warning", True):
warnings.warn(
"Cache control boolean flags are deprecated and will be removed in version X.X.X. "
"Use 'cache_mode' parameter instead. Examples:\n"
@@ -180,7 +181,7 @@ class AsyncWebCrawler:
"- For disable_cache=True, use cache_mode=CacheMode.DISABLED\n"
"- For no_cache_read=True, use cache_mode=CacheMode.WRITE_ONLY\n"
"- For no_cache_write=True, use cache_mode=CacheMode.READ_ONLY\n"
"Set SHOW_DEPRECATION_WARNINGS=False in config.py to suppress this warning.",
"Pass warning=False to suppress this warning.",
DeprecationWarning,
stacklevel=2
)
@@ -257,6 +258,7 @@ class AsyncWebCrawler:
word_count_threshold=word_count_threshold,
extraction_strategy=extraction_strategy,
chunking_strategy=chunking_strategy,
content_filter=content_filter,
css_selector=css_selector,
screenshot=screenshot_data,
verbose=verbose,
@@ -308,6 +310,7 @@ class AsyncWebCrawler:
word_count_threshold=MIN_WORD_THRESHOLD,
extraction_strategy: ExtractionStrategy = None,
chunking_strategy: ChunkingStrategy = RegexChunking(),
content_filter: RelevantContentFilter = None,
cache_mode: Optional[CacheMode] = None,
# Deprecated parameters
bypass_cache: bool = False,
@@ -335,14 +338,15 @@ class AsyncWebCrawler:
Returns:
List[CrawlResult]: Results for each URL
"""
if bypass_cache and SHOW_DEPRECATION_WARNINGS:
warnings.warn(
"'bypass_cache' is deprecated and will be removed in version X.X.X. "
"Use 'cache_mode=CacheMode.BYPASS' instead. "
"Set SHOW_DEPRECATION_WARNINGS=False in config.py to suppress this warning.",
DeprecationWarning,
stacklevel=2
)
if bypass_cache:
if kwargs.get("warning", True):
warnings.warn(
"'bypass_cache' is deprecated and will be removed in version X.X.X. "
"Use 'cache_mode=CacheMode.BYPASS' instead. "
"Pass warning=False to suppress this warning.",
DeprecationWarning,
stacklevel=2
)
if cache_mode is None:
cache_mode = CacheMode.BYPASS
@@ -356,6 +360,7 @@ class AsyncWebCrawler:
word_count_threshold=word_count_threshold,
extraction_strategy=extraction_strategy,
chunking_strategy=chunking_strategy,
content_filter=content_filter,
cache_mode=cache_mode,
css_selector=css_selector,
screenshot=screenshot,
@@ -377,6 +382,7 @@ class AsyncWebCrawler:
word_count_threshold: int,
extraction_strategy: ExtractionStrategy,
chunking_strategy: ChunkingStrategy,
content_filter: RelevantContentFilter,
css_selector: str,
screenshot: str,
verbose: bool,
@@ -397,6 +403,7 @@ class AsyncWebCrawler:
image_description_min_word_threshold=kwargs.get(
"image_description_min_word_threshold", IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD
),
content_filter = content_filter,
**kwargs,
)