Feature: Add Markdown generation to CrawlerRunConfig

- Added markdown generator parameter to CrawlerRunConfig in `async_configs.py`.
  - Implemented logic for Markdown generation in content scraping in `async_webcrawler.py`.
  - Updated version number to 0.4.21 in `__version__.py`.
This commit is contained in:
UncleCode
2024-12-13 21:51:38 +08:00
parent 7af1d32ef6
commit 7524aa7b5e
5 changed files with 46 additions and 28 deletions

View File

@@ -602,16 +602,16 @@ class WebScrapingStrategy(ContentScrapingStrategy):
cleaned_html = str_body.replace('\n\n', '\n').replace(' ', ' ')
markdown_content = self._generate_markdown_content(
cleaned_html=cleaned_html,
html=html,
url=url,
success=success,
**kwargs
)
# markdown_content = self._generate_markdown_content(
# cleaned_html=cleaned_html,
# html=html,
# url=url,
# success=success,
# **kwargs
# )
return {
**markdown_content,
# **markdown_content,
'cleaned_html': cleaned_html,
'success': success,
'media': media,