Fix: Use correct URL variable for raw HTML extraction (#1116)

- Prevents full HTML content from being passed as URL to extraction strategies - Added unit tests to verify raw HTML and regular URL processing Fix: Wrong URL variable used for extraction of raw html
2025-08-28 10:46:44 +03:00
parent e651e045c4
commit edd0b576b1
2 changed files with 81 additions and 2 deletions
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -615,7 +615,7 @@ class AsyncWebCrawler:
                else config.chunking_strategy
            )
            sections = chunking.chunk(content)
-            extracted_content = config.extraction_strategy.run(url, sections)
+            extracted_content = config.extraction_strategy.run(_url, sections)
            extracted_content = json.dumps(
                extracted_content, indent=4, default=str, ensure_ascii=False
            )