diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py index 98111e4b..91b98d7f 100644 --- a/crawl4ai/async_webcrawler.py +++ b/crawl4ai/async_webcrawler.py @@ -448,6 +448,7 @@ class AsyncWebCrawler: pdf_data=pdf_data, verbose=config.verbose, is_raw_html=True if url.startswith("raw:") else False, + redirected_url=async_response.redirected_url, **kwargs, ) @@ -596,7 +597,7 @@ class AsyncWebCrawler: markdown_result: MarkdownGenerationResult = ( markdown_generator.generate_markdown( cleaned_html=cleaned_html, - base_url=url, + base_url=params.get("redirected_url", url), # html2text_options=kwargs.get('html2text', {}) ) ) diff --git a/crawl4ai/content_scraping_strategy.py b/crawl4ai/content_scraping_strategy.py index 215e7cda..0848d655 100644 --- a/crawl4ai/content_scraping_strategy.py +++ b/crawl4ai/content_scraping_strategy.py @@ -128,7 +128,8 @@ class WebScrapingStrategy(ContentScrapingStrategy): Returns: ScrapingResult: A structured result containing the scraped content. """ - raw_result = self._scrap(url, html, is_async=False, **kwargs) + actual_url = kwargs.get("redirected_url", url) + raw_result = self._scrap(actual_url, html, is_async=False, **kwargs) if raw_result is None: return ScrapingResult( cleaned_html="",