diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py index 430e26a0..3aa7701a 100644 --- a/crawl4ai/async_webcrawler.py +++ b/crawl4ai/async_webcrawler.py @@ -398,7 +398,7 @@ class AsyncWebCrawler: html=html, extracted_content=extracted_content, config=config, # Pass the config object instead of individual parameters - screenshot=screenshot_data, + screenshot_data=screenshot_data, pdf_data=pdf_data, verbose=config.verbose, is_raw_html=True if url.startswith("raw:") else False, @@ -482,7 +482,7 @@ class AsyncWebCrawler: html: str, extracted_content: str, config: CrawlerRunConfig, - screenshot: str, + screenshot_data: str, pdf_data: str, verbose: bool, **kwargs, @@ -495,7 +495,7 @@ class AsyncWebCrawler: html: Raw HTML content extracted_content: Previously extracted content (if any) config: Configuration object controlling processing behavior - screenshot: Screenshot data (if any) + screenshot_data: Screenshot data (if any) pdf_data: PDF data (if any) verbose: Whether to enable verbose logging **kwargs: Additional parameters for backwards compatibility @@ -620,10 +620,6 @@ class AsyncWebCrawler: params={"url": _url, "timing": time.perf_counter() - t1}, ) - # Handle screenshot and PDF data - screenshot_data = None if not screenshot else screenshot - pdf_data = None if not pdf_data else pdf_data - # Apply HTML formatting if requested if config.prettiify: cleaned_html = fast_format_html(cleaned_html) diff --git a/docs/examples/full_page_screenshot_and_pdf_export.md b/docs/examples/full_page_screenshot_and_pdf_export.md index 8522675c..bf11f8db 100644 --- a/docs/examples/full_page_screenshot_and_pdf_export.md +++ b/docs/examples/full_page_screenshot_and_pdf_export.md @@ -12,9 +12,10 @@ We’ve introduced a new feature that effortlessly handles even the biggest page **Simple Example:** ```python -import os, sys +import os +import sys import asyncio -from crawl4ai import AsyncWebCrawler, CacheMode +from crawl4ai import AsyncWebCrawler, CacheMode, CrawlerRunConfig # Adjust paths as needed parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -26,9 +27,11 @@ async def main(): # Request both PDF and screenshot result = await crawler.arun( url='https://en.wikipedia.org/wiki/List_of_common_misconceptions', - cache_mode=CacheMode.BYPASS, - pdf=True, - screenshot=True + config=CrawlerRunConfig( + cache_mode=CacheMode.BYPASS, + pdf=True, + screenshot=True + ) ) if result.success: @@ -40,9 +43,8 @@ async def main(): # Save PDF if result.pdf: - pdf_bytes = b64decode(result.pdf) with open(os.path.join(__location__, "page.pdf"), "wb") as f: - f.write(pdf_bytes) + f.write(result.pdf) if __name__ == "__main__": asyncio.run(main())