Feature: Add Markdown generation to CrawlerRunConfig
- Added markdown generator parameter to CrawlerRunConfig in `async_configs.py`. - Implemented logic for Markdown generation in content scraping in `async_webcrawler.py`. - Updated version number to 0.4.21 in `__version__.py`.
This commit is contained in:
@@ -602,16 +602,16 @@ class WebScrapingStrategy(ContentScrapingStrategy):
|
||||
|
||||
cleaned_html = str_body.replace('\n\n', '\n').replace(' ', ' ')
|
||||
|
||||
markdown_content = self._generate_markdown_content(
|
||||
cleaned_html=cleaned_html,
|
||||
html=html,
|
||||
url=url,
|
||||
success=success,
|
||||
**kwargs
|
||||
)
|
||||
# markdown_content = self._generate_markdown_content(
|
||||
# cleaned_html=cleaned_html,
|
||||
# html=html,
|
||||
# url=url,
|
||||
# success=success,
|
||||
# **kwargs
|
||||
# )
|
||||
|
||||
return {
|
||||
**markdown_content,
|
||||
# **markdown_content,
|
||||
'cleaned_html': cleaned_html,
|
||||
'success': success,
|
||||
'media': media,
|
||||
|
||||
Reference in New Issue
Block a user