Merge pull request #1447 from rbushri/fix/wrong_url_raw

Fix: Wrong URL variable used for extraction of raw html
2025-11-25 17:49:44 +08:00
parent 84bfea8bd1 7771ed3894
commit 94c8a833bf
2 changed files with 83 additions and 4 deletions
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -617,17 +617,17 @@ class AsyncWebCrawler:
                else config.chunking_strategy
            )
            sections = chunking.chunk(content)
-            # extracted_content = config.extraction_strategy.run(url, sections)
+            # extracted_content = config.extraction_strategy.run(_url, sections)

            # Use async version if available for better parallelism
            if hasattr(config.extraction_strategy, 'arun'):
-                extracted_content = await config.extraction_strategy.arun(url, sections)
+                extracted_content = await config.extraction_strategy.arun(_url, sections)
            else:
                # Fallback to sync version run in thread pool to avoid blocking
                extracted_content = await asyncio.to_thread(
                    config.extraction_strategy.run, url, sections
                )
-
+                
            extracted_content = json.dumps(
                extracted_content, indent=4, default=str, ensure_ascii=False
            )