Aravind Karnam
2025-03-25 15:17:59 +05:30
parent e3111d0a32
commit 585e5e5973
2 changed files with 4 additions and 2 deletions

View File

@@ -448,6 +448,7 @@ class AsyncWebCrawler:
pdf_data=pdf_data,
verbose=config.verbose,
is_raw_html=True if url.startswith("raw:") else False,
redirected_url=async_response.redirected_url,
**kwargs,
)
@@ -596,7 +597,7 @@ class AsyncWebCrawler:
markdown_result: MarkdownGenerationResult = (
markdown_generator.generate_markdown(
cleaned_html=cleaned_html,
base_url=url,
base_url=params.get("redirected_url", url),
# html2text_options=kwargs.get('html2text', {})
)
)

View File

@@ -128,7 +128,8 @@ class WebScrapingStrategy(ContentScrapingStrategy):
Returns:
ScrapingResult: A structured result containing the scraped content.
"""
raw_result = self._scrap(url, html, is_async=False, **kwargs)
actual_url = kwargs.get("redirected_url", url)
raw_result = self._scrap(actual_url, html, is_async=False, **kwargs)
if raw_result is None:
return ScrapingResult(
cleaned_html="",