diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py index 6294e2f4..88d94a46 100644 --- a/crawl4ai/async_crawler_strategy.py +++ b/crawl4ai/async_crawler_strategy.py @@ -466,9 +466,9 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy): console_messages=captured_console, ) - elif url.startswith("raw:") or url.startswith("raw://"): + elif url.startswith("raw:"): # Process raw HTML content - raw_html = url[4:] if url[:4] == "raw:" else url[7:] + raw_html = url[6:] if url.startswith("raw://") else url[4:] html = raw_html if config.screenshot: screenshot_data = await self._generate_screenshot_from_html(html) diff --git a/tests/general/test_async_crawler_strategy.py b/tests/general/test_async_crawler_strategy.py index 68fe4a88..8426fe0a 100644 --- a/tests/general/test_async_crawler_strategy.py +++ b/tests/general/test_async_crawler_strategy.py @@ -15,6 +15,24 @@ CRAWL4AI_HOME_DIR = Path(os.path.expanduser("~")).joinpath(".crawl4ai") if not CRAWL4AI_HOME_DIR.joinpath("profiles", "test_profile").exists(): CRAWL4AI_HOME_DIR.joinpath("profiles", "test_profile").mkdir(parents=True) +@pytest.fixture +def basic_html(): + return """ + + + Basic HTML + + +

Main Heading

+
+
+

Basic HTML document for testing purposes.

+
+
+ + + """ + # Test Config Files @pytest.fixture def basic_browser_config(): @@ -325,6 +343,13 @@ async def test_stealth_mode(crawler_strategy): ) assert response.status_code == 200 +@pytest.mark.asyncio +@pytest.mark.parametrize("prefix", ("raw:", "raw://")) +async def test_raw_urls(crawler_strategy, basic_html, prefix): + url = f"{prefix}{basic_html}" + response = await crawler_strategy.crawl(url, CrawlerRunConfig()) + assert response.html == basic_html + # Error Handling Tests @pytest.mark.asyncio async def test_invalid_url():