Merge branch 'main' into 0.3.743

This commit is contained in:
UncleCode
2024-11-28 12:53:30 +08:00
2 changed files with 13 additions and 2 deletions

View File

@@ -110,7 +110,15 @@ For contributors who plan to modify the source code:
```bash
git clone https://github.com/unclecode/crawl4ai.git
cd crawl4ai
pip install -e .
pip install -e . # Basic installation in editable mode
```
Install optional features:
```bash
pip install -e ".[torch]" # With PyTorch features
pip install -e ".[transformer]" # With Transformer features
pip install -e ".[cosine]" # With cosine similarity features
pip install -e ".[sync]" # With synchronous crawling (Selenium)
pip install -e ".[all]" # Install all optional features
```
## One-Click Deployment 🚀

View File

@@ -96,6 +96,7 @@ class DefaultMarkdownGenerator(MarkdownGenerationStrategy):
)
# Generate fit markdown if content filter is provided
<<<<<<< HEAD
fit_markdown: Optional[str] = ""
filtered_html: Optional[str] = ""
if content_filter or self.content_filter:
@@ -103,13 +104,15 @@ class DefaultMarkdownGenerator(MarkdownGenerationStrategy):
filtered_html = content_filter.filter_content(cleaned_html)
filtered_html = '\n'.join('<div>{}</div>'.format(s) for s in filtered_html)
fit_markdown = h.handle(filtered_html)
>>>>>>> origin/main
fit_markdown = h.handle(filtered_html)
return MarkdownGenerationResult(
raw_markdown=raw_markdown,
markdown_with_citations=markdown_with_citations,
references_markdown=references_markdown,
fit_markdown=fit_markdown,
fit_html=filtered_html
fit_html=filtered_html,
)
def fast_urljoin(base: str, url: str) -> str: