This commit is contained in:
@@ -448,6 +448,7 @@ class AsyncWebCrawler:
|
||||
pdf_data=pdf_data,
|
||||
verbose=config.verbose,
|
||||
is_raw_html=True if url.startswith("raw:") else False,
|
||||
redirected_url=async_response.redirected_url,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -596,7 +597,7 @@ class AsyncWebCrawler:
|
||||
markdown_result: MarkdownGenerationResult = (
|
||||
markdown_generator.generate_markdown(
|
||||
cleaned_html=cleaned_html,
|
||||
base_url=url,
|
||||
base_url=params.get("redirected_url", url),
|
||||
# html2text_options=kwargs.get('html2text', {})
|
||||
)
|
||||
)
|
||||
|
||||
@@ -128,7 +128,8 @@ class WebScrapingStrategy(ContentScrapingStrategy):
|
||||
Returns:
|
||||
ScrapingResult: A structured result containing the scraped content.
|
||||
"""
|
||||
raw_result = self._scrap(url, html, is_async=False, **kwargs)
|
||||
actual_url = kwargs.get("redirected_url", url)
|
||||
raw_result = self._scrap(actual_url, html, is_async=False, **kwargs)
|
||||
if raw_result is None:
|
||||
return ScrapingResult(
|
||||
cleaned_html="",
|
||||
|
||||
Reference in New Issue
Block a user