2025 feb alpha 1 (#685)
* spelling change in prompt * gpt-4o-mini support * Remove leading Y before here * prompt spell correction * (Docs) Fix numbered list end-of-line formatting Added the missing "two spaces" to add a line break * fix: access downloads_path through browser_config in _handle_download method - Fixes #585 * crawl * fix: https://github.com/unclecode/crawl4ai/issues/592 * fix: https://github.com/unclecode/crawl4ai/issues/583 * Docs update: https://github.com/unclecode/crawl4ai/issues/649 * fix: https://github.com/unclecode/crawl4ai/issues/570 * Docs: updated example for content-selection to reflect new changes in yc newsfeed css * Refactor: Removed old filters and replaced with optimised filters * fix:Fixed imports as per the new names of filters * Tests: For deep crawl filters * Refactor: Remove old scorers and replace with optimised ones: Fix imports forall filters and scorers. * fix: awaiting on filters that are async in nature eg: content relevance and seo filters * fix: https://github.com/unclecode/crawl4ai/issues/592 * fix: https://github.com/unclecode/crawl4ai/issues/715 --------- Co-authored-by: DarshanTank <darshan.tank@gnani.ai> Co-authored-by: Tuhin Mallick <tuhin.mllk@gmail.com> Co-authored-by: Serhat Soydan <ssoydan@gmail.com> Co-authored-by: cardit1 <maneesh@cardit.in> Co-authored-by: Tautik Agrahari <tautikagrahari@gmail.com>
This commit is contained in:
@@ -17,11 +17,16 @@ from .extraction_strategy import (
|
||||
LLMExtractionStrategy,
|
||||
CosineStrategy,
|
||||
JsonCssExtractionStrategy,
|
||||
JsonXPathExtractionStrategy
|
||||
JsonXPathExtractionStrategy,
|
||||
)
|
||||
from .chunking_strategy import ChunkingStrategy, RegexChunking
|
||||
from .markdown_generation_strategy import DefaultMarkdownGenerator
|
||||
from .content_filter_strategy import PruningContentFilter, BM25ContentFilter, LLMContentFilter, RelevantContentFilter
|
||||
from .content_filter_strategy import (
|
||||
PruningContentFilter,
|
||||
BM25ContentFilter,
|
||||
LLMContentFilter,
|
||||
RelevantContentFilter,
|
||||
)
|
||||
from .models import CrawlResult, MarkdownGenerationResult
|
||||
from .async_dispatcher import (
|
||||
MemoryAdaptiveDispatcher,
|
||||
@@ -29,20 +34,25 @@ from .async_dispatcher import (
|
||||
RateLimiter,
|
||||
CrawlerMonitor,
|
||||
DisplayMode,
|
||||
BaseDispatcher
|
||||
BaseDispatcher,
|
||||
)
|
||||
from .docker_client import Crawl4aiDockerClient
|
||||
from .hub import CrawlerHub
|
||||
from .deep_crawling import (
|
||||
DeepCrawlStrategy,
|
||||
BFSDeepCrawlStrategy,
|
||||
FastFilterChain,
|
||||
FastContentTypeFilter,
|
||||
FastDomainFilter,
|
||||
FastURLFilter,
|
||||
FastFilterStats,
|
||||
FastKeywordRelevanceScorer,
|
||||
FastURLScorer,
|
||||
FilterChain,
|
||||
ContentTypeFilter,
|
||||
DomainFilter,
|
||||
URLFilter,
|
||||
FilterStats,
|
||||
SEOFilter,
|
||||
KeywordRelevanceScorer,
|
||||
URLScorer,
|
||||
CompositeScorer,
|
||||
DomainAuthorityScorer,
|
||||
FreshnessScorer,
|
||||
PathDepthScorer,
|
||||
BestFirstCrawlingStrategy,
|
||||
DFSDeepCrawlStrategy,
|
||||
DeepCrawlDecorator,
|
||||
@@ -54,13 +64,18 @@ __all__ = [
|
||||
"BFSDeepCrawlStrategy",
|
||||
"BestFirstCrawlingStrategy",
|
||||
"DFSDeepCrawlStrategy",
|
||||
"FastFilterChain",
|
||||
"FastContentTypeFilter",
|
||||
"FastDomainFilter",
|
||||
"FastFilterStats",
|
||||
"FastURLFilter",
|
||||
"FastKeywordRelevanceScorer",
|
||||
"FastURLScorer",
|
||||
"FilterChain",
|
||||
"ContentTypeFilter",
|
||||
"DomainFilter",
|
||||
"FilterStats",
|
||||
"URLFilter",
|
||||
"SEOFilter",
|
||||
"KeywordRelevanceScorer",
|
||||
"URLScorer",
|
||||
"CompositeScorer",
|
||||
"DomainAuthorityScorer",
|
||||
"FreshnessScorer",
|
||||
"PathDepthScorer",
|
||||
"DeepCrawlDecorator",
|
||||
"CrawlResult",
|
||||
"CrawlerHub",
|
||||
|
||||
Reference in New Issue
Block a user