refactor(core): improve type hints and remove unused file
- Add RelevantContentFilter to __init__.py exports - Update version to 0.4.3b3 - Enhance type hints in async_configs.py - Remove empty utils.scraping.py file - Update mkdocs configuration with version info and GitHub integration BREAKING CHANGE: None
This commit is contained in:
@@ -16,7 +16,7 @@ from .extraction_strategy import (
|
||||
)
|
||||
from .chunking_strategy import ChunkingStrategy, RegexChunking
|
||||
from .markdown_generation_strategy import DefaultMarkdownGenerator
|
||||
from .content_filter_strategy import PruningContentFilter, BM25ContentFilter, LLMContentFilter
|
||||
from .content_filter_strategy import PruningContentFilter, BM25ContentFilter, LLMContentFilter, RelevantContentFilter
|
||||
from .models import CrawlResult, MarkdownGenerationResult
|
||||
from .async_dispatcher import (
|
||||
MemoryAdaptiveDispatcher,
|
||||
@@ -44,6 +44,7 @@ __all__ = [
|
||||
"ChunkingStrategy",
|
||||
"RegexChunking",
|
||||
"DefaultMarkdownGenerator",
|
||||
"RelevantContentFilter",
|
||||
"PruningContentFilter",
|
||||
"BM25ContentFilter",
|
||||
"LLMContentFilter",
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
# crawl4ai/_version.py
|
||||
__version__ = "0.4.3b2"
|
||||
__version__ = "0.4.3b3"
|
||||
|
||||
@@ -6,12 +6,15 @@ from .config import (
|
||||
IMAGE_SCORE_THRESHOLD,
|
||||
SOCIAL_MEDIA_DOMAINS,
|
||||
)
|
||||
|
||||
from .user_agent_generator import UserAgentGenerator
|
||||
from .extraction_strategy import ExtractionStrategy
|
||||
from .chunking_strategy import ChunkingStrategy, RegexChunking
|
||||
from .markdown_generation_strategy import MarkdownGenerationStrategy
|
||||
from .content_filter_strategy import RelevantContentFilter, BM25ContentFilter, LLMContentFilter, PruningContentFilter
|
||||
from .content_scraping_strategy import ContentScrapingStrategy, WebScrapingStrategy
|
||||
from typing import Optional, Union, List
|
||||
from .cache_context import CacheMode
|
||||
|
||||
|
||||
class BrowserConfig:
|
||||
@@ -81,13 +84,13 @@ class BrowserConfig:
|
||||
user_data_dir: str = None,
|
||||
chrome_channel: str = "chromium",
|
||||
channel: str = "chromium",
|
||||
proxy: Optional[str] = None,
|
||||
proxy: str = None,
|
||||
proxy_config: dict = None,
|
||||
viewport_width: int = 1080,
|
||||
viewport_height: int = 600,
|
||||
accept_downloads: bool = False,
|
||||
downloads_path: str = None,
|
||||
storage_state=None,
|
||||
storage_state : Union[str, dict, None]=None,
|
||||
ignore_https_errors: bool = True,
|
||||
java_script_enabled: bool = True,
|
||||
sleep_on_close: bool = False,
|
||||
@@ -382,7 +385,7 @@ class CrawlerRunConfig:
|
||||
extraction_strategy: ExtractionStrategy = None,
|
||||
chunking_strategy: ChunkingStrategy = RegexChunking(),
|
||||
markdown_generator: MarkdownGenerationStrategy = None,
|
||||
content_filter=None,
|
||||
content_filter : RelevantContentFilter = None,
|
||||
only_text: bool = False,
|
||||
css_selector: str = None,
|
||||
excluded_tags: list = None,
|
||||
@@ -396,7 +399,7 @@ class CrawlerRunConfig:
|
||||
# SSL Parameters
|
||||
fetch_ssl_certificate: bool = False,
|
||||
# Caching Parameters
|
||||
cache_mode=None,
|
||||
cache_mode: CacheMode =None,
|
||||
session_id: str = None,
|
||||
bypass_cache: bool = False,
|
||||
disable_cache: bool = False,
|
||||
|
||||
11
mkdocs.yml
11
mkdocs.yml
@@ -1,4 +1,4 @@
|
||||
site_name: Crawl4AI Documentation
|
||||
site_name: Crawl4AI Documentation (v0.4.3b2)
|
||||
site_description: 🚀🤖 Crawl4AI, Open-source LLM-Friendly Web Crawler & Scraper
|
||||
site_url: https://docs.crawl4ai.com
|
||||
repo_url: https://github.com/unclecode/crawl4ai
|
||||
@@ -52,6 +52,11 @@ nav:
|
||||
theme:
|
||||
name: 'terminal'
|
||||
palette: 'dark'
|
||||
icon:
|
||||
repo: fontawesome/brands/github
|
||||
|
||||
plugins:
|
||||
- search
|
||||
|
||||
markdown_extensions:
|
||||
- pymdownx.highlight:
|
||||
@@ -64,6 +69,9 @@ markdown_extensions:
|
||||
- attr_list
|
||||
- tables
|
||||
|
||||
extra:
|
||||
version: !ENV [CRAWL4AI_VERSION, 'development']
|
||||
|
||||
extra_css:
|
||||
- assets/styles.css
|
||||
- assets/highlight.css
|
||||
@@ -72,3 +80,4 @@ extra_css:
|
||||
extra_javascript:
|
||||
- assets/highlight.min.js
|
||||
- assets/highlight_init.js
|
||||
- https://buttons.github.io/buttons.js
|
||||
Reference in New Issue
Block a user