diff --git a/README.md b/README.md index 6c5e256e..5ba33dea 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,15 @@ For contributors who plan to modify the source code: ```bash git clone https://github.com/unclecode/crawl4ai.git cd crawl4ai -pip install -e . +pip install -e . # Basic installation in editable mode +``` +Install optional features: +```bash +pip install -e ".[torch]" # With PyTorch features +pip install -e ".[transformer]" # With Transformer features +pip install -e ".[cosine]" # With cosine similarity features +pip install -e ".[sync]" # With synchronous crawling (Selenium) +pip install -e ".[all]" # Install all optional features ``` ## One-Click Deployment 🚀 diff --git a/crawl4ai/markdown_generation_strategy.py b/crawl4ai/markdown_generation_strategy.py index b1e43f9d..377f6c84 100644 --- a/crawl4ai/markdown_generation_strategy.py +++ b/crawl4ai/markdown_generation_strategy.py @@ -96,6 +96,7 @@ class DefaultMarkdownGenerator(MarkdownGenerationStrategy): ) # Generate fit markdown if content filter is provided +<<<<<<< HEAD fit_markdown: Optional[str] = "" filtered_html: Optional[str] = "" if content_filter or self.content_filter: @@ -103,13 +104,15 @@ class DefaultMarkdownGenerator(MarkdownGenerationStrategy): filtered_html = content_filter.filter_content(cleaned_html) filtered_html = '\n'.join('
{}
'.format(s) for s in filtered_html) fit_markdown = h.handle(filtered_html) +>>>>>>> origin/main + fit_markdown = h.handle(filtered_html) return MarkdownGenerationResult( raw_markdown=raw_markdown, markdown_with_citations=markdown_with_citations, references_markdown=references_markdown, fit_markdown=fit_markdown, - fit_html=filtered_html + fit_html=filtered_html, ) def fast_urljoin(base: str, url: str) -> str: