docs + fix: Update example for full page screenshot & PDF export. Fix the bug Error: crawl4ai.async_webcrawler.AsyncWebCrawler.aprocess_html() got multiple values for keyword argument - for screenshot param. https://github.com/unclecode/crawl4ai/issues/822#issuecomment-2732602118

2025-03-18 17:20:24 +05:30
parent 529a79725e
commit 4359b12003
2 changed files with 12 additions and 14 deletions
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -398,7 +398,7 @@ class AsyncWebCrawler:
                        html=html,
                        extracted_content=extracted_content,
                        config=config,  # Pass the config object instead of individual parameters
-                        screenshot=screenshot_data,
+                        screenshot_data=screenshot_data,
                        pdf_data=pdf_data,
                        verbose=config.verbose,
                        is_raw_html=True if url.startswith("raw:") else False,
@@ -482,7 +482,7 @@ class AsyncWebCrawler:
        html: str,
        extracted_content: str,
        config: CrawlerRunConfig,
-        screenshot: str,
+        screenshot_data: str,
        pdf_data: str,
        verbose: bool,
        **kwargs,
@@ -495,7 +495,7 @@ class AsyncWebCrawler:
            html: Raw HTML content
            extracted_content: Previously extracted content (if any)
            config: Configuration object controlling processing behavior
-            screenshot: Screenshot data (if any)
+            screenshot_data: Screenshot data (if any)
            pdf_data: PDF data (if any)
            verbose: Whether to enable verbose logging
            **kwargs: Additional parameters for backwards compatibility
@@ -620,10 +620,6 @@ class AsyncWebCrawler:
                params={"url": _url, "timing": time.perf_counter() - t1},
            )
        # Handle screenshot and PDF data
        screenshot_data = None if not screenshot else screenshot
        pdf_data = None if not pdf_data else pdf_data
        # Apply HTML formatting if requested
        if config.prettiify:
            cleaned_html = fast_format_html(cleaned_html)
--- a/docs/examples/full_page_screenshot_and_pdf_export.md
+++ b/docs/examples/full_page_screenshot_and_pdf_export.md
@@ -12,9 +12,10 @@ We’ve introduced a new feature that effortlessly handles even the biggest page
 **Simple Example:**
 ```python
-import os, sys
+import os
 import sys
 import asyncio
-from crawl4ai import AsyncWebCrawler, CacheMode
+from crawl4ai import AsyncWebCrawler, CacheMode, CrawlerRunConfig
 # Adjust paths as needed
 parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -26,9 +27,11 @@ async def main():
        # Request both PDF and screenshot
        result = await crawler.arun(
            url='https://en.wikipedia.org/wiki/List_of_common_misconceptions',
-            cache_mode=CacheMode.BYPASS,
+            config=CrawlerRunConfig(
-            pdf=True,
+                cache_mode=CacheMode.BYPASS,
-            screenshot=True
+                pdf=True,
                screenshot=True
            )
        )
        if result.success:
@@ -40,9 +43,8 @@ async def main():
            # Save PDF
            if result.pdf:
                pdf_bytes = b64decode(result.pdf)
                with open(os.path.join(__location__, "page.pdf"), "wb") as f:
-                    f.write(pdf_bytes)
+                    f.write(result.pdf)
 if __name__ == "__main__":
    asyncio.run(main())