Merge branch 'main' into 0.3.743
This commit is contained in:
10
README.md
10
README.md
@@ -110,7 +110,15 @@ For contributors who plan to modify the source code:
|
|||||||
```bash
|
```bash
|
||||||
git clone https://github.com/unclecode/crawl4ai.git
|
git clone https://github.com/unclecode/crawl4ai.git
|
||||||
cd crawl4ai
|
cd crawl4ai
|
||||||
pip install -e .
|
pip install -e . # Basic installation in editable mode
|
||||||
|
```
|
||||||
|
Install optional features:
|
||||||
|
```bash
|
||||||
|
pip install -e ".[torch]" # With PyTorch features
|
||||||
|
pip install -e ".[transformer]" # With Transformer features
|
||||||
|
pip install -e ".[cosine]" # With cosine similarity features
|
||||||
|
pip install -e ".[sync]" # With synchronous crawling (Selenium)
|
||||||
|
pip install -e ".[all]" # Install all optional features
|
||||||
```
|
```
|
||||||
|
|
||||||
## One-Click Deployment 🚀
|
## One-Click Deployment 🚀
|
||||||
|
|||||||
@@ -96,6 +96,7 @@ class DefaultMarkdownGenerator(MarkdownGenerationStrategy):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Generate fit markdown if content filter is provided
|
# Generate fit markdown if content filter is provided
|
||||||
|
<<<<<<< HEAD
|
||||||
fit_markdown: Optional[str] = ""
|
fit_markdown: Optional[str] = ""
|
||||||
filtered_html: Optional[str] = ""
|
filtered_html: Optional[str] = ""
|
||||||
if content_filter or self.content_filter:
|
if content_filter or self.content_filter:
|
||||||
@@ -103,13 +104,15 @@ class DefaultMarkdownGenerator(MarkdownGenerationStrategy):
|
|||||||
filtered_html = content_filter.filter_content(cleaned_html)
|
filtered_html = content_filter.filter_content(cleaned_html)
|
||||||
filtered_html = '\n'.join('<div>{}</div>'.format(s) for s in filtered_html)
|
filtered_html = '\n'.join('<div>{}</div>'.format(s) for s in filtered_html)
|
||||||
fit_markdown = h.handle(filtered_html)
|
fit_markdown = h.handle(filtered_html)
|
||||||
|
>>>>>>> origin/main
|
||||||
|
fit_markdown = h.handle(filtered_html)
|
||||||
|
|
||||||
return MarkdownGenerationResult(
|
return MarkdownGenerationResult(
|
||||||
raw_markdown=raw_markdown,
|
raw_markdown=raw_markdown,
|
||||||
markdown_with_citations=markdown_with_citations,
|
markdown_with_citations=markdown_with_citations,
|
||||||
references_markdown=references_markdown,
|
references_markdown=references_markdown,
|
||||||
fit_markdown=fit_markdown,
|
fit_markdown=fit_markdown,
|
||||||
fit_html=filtered_html
|
fit_html=filtered_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
def fast_urljoin(base: str, url: str) -> str:
|
def fast_urljoin(base: str, url: str) -> str:
|
||||||
|
|||||||
Reference in New Issue
Block a user