refactor(config): enhance serialization and config handling

- Add ignore_default_value option to to_serializable_dict
- Add viewport dict support in BrowserConfig
- Replace FastFilterChain with FilterChain
- Add deprecation warnings for unwanted properties
- Clean up unused imports
- Rename example files for consistency
- Add comprehensive Docker configuration tutorial

BREAKING CHANGE: FastFilterChain has been replaced with FilterChain
This commit is contained in:
UncleCode
2025-02-19 17:23:25 +08:00
parent dad592c801
commit 3cb28875c3
7 changed files with 308 additions and 33 deletions

View File

@@ -10,7 +10,7 @@ import inspect
from crawl4ai import CacheMode
from crawl4ai.async_configs import CrawlerRunConfig
from crawl4ai.models import CrawlResult, TraversalStats
from crawl4ai.deep_crawling.filters import FastFilterChain
from crawl4ai.deep_crawling.filters import FilterChain
from crawl4ai.async_webcrawler import AsyncWebCrawler
import time
import logging
@@ -313,7 +313,7 @@ class BFSDeepCrawlStrategy(DeepCrawlStrategy):
def __init__(self,
max_depth: int,
filter_chain: FastFilterChain = FastFilterChain(),
filter_chain: FilterChain = FilterChain(),
priority_fn: Callable[[str], Awaitable[float]] = lambda url: 1.0,
logger: logging.Logger = None):
self.max_depth = max_depth
@@ -408,7 +408,7 @@ async def main():
strategy = BFSDeepCrawlStrategy(
max_depth=2,
priority_fn=lambda url: 1.0 / (len(url) + 1e-9), # Inverse length priority
# filter_chain=FastFilterChain(...)
# filter_chain=FilterChain(...)
)
config: CrawlerRunConfig = CrawlerRunConfig(