refactor(scraping): replace ScrapingMode enum with strategy pattern
Replace the ScrapingMode enum with a proper strategy pattern implementation for content scraping. This change introduces: - New ContentScrapingStrategy abstract base class - Concrete WebScrapingStrategy and LXMLWebScrapingStrategy implementations - New Pydantic models for structured scraping results - Updated documentation reflecting the new strategy-based approach BREAKING CHANGE: ScrapingMode enum has been removed. Users should now use ContentScrapingStrategy implementations instead.
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
# __init__.py
|
||||
|
||||
from .async_webcrawler import AsyncWebCrawler, CacheMode
|
||||
from .async_configs import BrowserConfig, CrawlerRunConfig, ScrapingMode
|
||||
from .async_configs import BrowserConfig, CrawlerRunConfig
|
||||
from .content_scraping_strategy import ContentScrapingStrategy, WebScrapingStrategy, LXMLWebScrapingStrategy
|
||||
from .extraction_strategy import ExtractionStrategy, LLMExtractionStrategy, CosineStrategy, JsonCssExtractionStrategy
|
||||
from .chunking_strategy import ChunkingStrategy, RegexChunking
|
||||
from .markdown_generation_strategy import DefaultMarkdownGenerator
|
||||
@@ -14,7 +15,9 @@ __all__ = [
|
||||
"AsyncWebCrawler",
|
||||
"CrawlResult",
|
||||
"CacheMode",
|
||||
"ScrapingMode",
|
||||
"ContentScrapingStrategy",
|
||||
"WebScrapingStrategy",
|
||||
"LXMLWebScrapingStrategy",
|
||||
'BrowserConfig',
|
||||
'CrawlerRunConfig',
|
||||
'ExtractionStrategy',
|
||||
|
||||
Reference in New Issue
Block a user