This commit is contained in:
@@ -448,6 +448,7 @@ class AsyncWebCrawler:
|
|||||||
pdf_data=pdf_data,
|
pdf_data=pdf_data,
|
||||||
verbose=config.verbose,
|
verbose=config.verbose,
|
||||||
is_raw_html=True if url.startswith("raw:") else False,
|
is_raw_html=True if url.startswith("raw:") else False,
|
||||||
|
redirected_url=async_response.redirected_url,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -596,7 +597,7 @@ class AsyncWebCrawler:
|
|||||||
markdown_result: MarkdownGenerationResult = (
|
markdown_result: MarkdownGenerationResult = (
|
||||||
markdown_generator.generate_markdown(
|
markdown_generator.generate_markdown(
|
||||||
cleaned_html=cleaned_html,
|
cleaned_html=cleaned_html,
|
||||||
base_url=url,
|
base_url=params.get("redirected_url", url),
|
||||||
# html2text_options=kwargs.get('html2text', {})
|
# html2text_options=kwargs.get('html2text', {})
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -128,7 +128,8 @@ class WebScrapingStrategy(ContentScrapingStrategy):
|
|||||||
Returns:
|
Returns:
|
||||||
ScrapingResult: A structured result containing the scraped content.
|
ScrapingResult: A structured result containing the scraped content.
|
||||||
"""
|
"""
|
||||||
raw_result = self._scrap(url, html, is_async=False, **kwargs)
|
actual_url = kwargs.get("redirected_url", url)
|
||||||
|
raw_result = self._scrap(actual_url, html, is_async=False, **kwargs)
|
||||||
if raw_result is None:
|
if raw_result is None:
|
||||||
return ScrapingResult(
|
return ScrapingResult(
|
||||||
cleaned_html="",
|
cleaned_html="",
|
||||||
|
|||||||
Reference in New Issue
Block a user