From b7a6e02236f9da30c1bb21b8a5bb3dab86d97233 Mon Sep 17 00:00:00 2001 From: ntohidi Date: Wed, 18 Jun 2025 19:04:32 +0200 Subject: [PATCH] fix: Update pdf and screenshot usage documentation. ref #1230 --- deploy/docker/c4ai-doc-context.md | 29 ++++++++++++++++-------- docs/md_v2/advanced/advanced-features.md | 29 ++++++++++++++++-------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/deploy/docker/c4ai-doc-context.md b/deploy/docker/c4ai-doc-context.md index 6591c265..f8b83088 100644 --- a/deploy/docker/c4ai-doc-context.md +++ b/deploy/docker/c4ai-doc-context.md @@ -5433,29 +5433,38 @@ Sometimes you need a visual record of a page or a PDF “printout.” Crawl4AI c ```python import os, asyncio from base64 import b64decode -from crawl4ai import AsyncWebCrawler, CacheMode +from crawl4ai import AsyncWebCrawler, CacheMode, CrawlerRunConfig async def main(): + run_config = CrawlerRunConfig( + cache_mode=CacheMode.BYPASS, + screenshot=True, + pdf=True + ) + async with AsyncWebCrawler() as crawler: result = await crawler.arun( url="https://en.wikipedia.org/wiki/List_of_common_misconceptions", - cache_mode=CacheMode.BYPASS, - pdf=True, - screenshot=True + config=run_config ) - if result.success: - # Save screenshot + print(f"Screenshot data present: {result.screenshot is not None}") + print(f"PDF data present: {result.pdf is not None}") + if result.screenshot: + print(f"[OK] Screenshot captured, size: {len(result.screenshot)} bytes") with open("wikipedia_screenshot.png", "wb") as f: f.write(b64decode(result.screenshot)) - - # Save PDF + else: + print("[WARN] Screenshot data is None.") + if result.pdf: + print(f"[OK] PDF captured, size: {len(result.pdf)} bytes") with open("wikipedia_page.pdf", "wb") as f: f.write(result.pdf) - - print("[OK] PDF & screenshot captured.") + else: + print("[WARN] PDF data is None.") + else: print("[ERROR]", result.error_message) diff --git a/docs/md_v2/advanced/advanced-features.md b/docs/md_v2/advanced/advanced-features.md index b56f216e..3563fd40 100644 --- a/docs/md_v2/advanced/advanced-features.md +++ b/docs/md_v2/advanced/advanced-features.md @@ -66,29 +66,38 @@ Sometimes you need a visual record of a page or a PDF “printout.” Crawl4AI c ```python import os, asyncio from base64 import b64decode -from crawl4ai import AsyncWebCrawler, CacheMode +from crawl4ai import AsyncWebCrawler, CacheMode, CrawlerRunConfig async def main(): + run_config = CrawlerRunConfig( + cache_mode=CacheMode.BYPASS, + screenshot=True, + pdf=True + ) + async with AsyncWebCrawler() as crawler: result = await crawler.arun( url="https://en.wikipedia.org/wiki/List_of_common_misconceptions", - cache_mode=CacheMode.BYPASS, - pdf=True, - screenshot=True + config=run_config ) - if result.success: - # Save screenshot + print(f"Screenshot data present: {result.screenshot is not None}") + print(f"PDF data present: {result.pdf is not None}") + if result.screenshot: + print(f"[OK] Screenshot captured, size: {len(result.screenshot)} bytes") with open("wikipedia_screenshot.png", "wb") as f: f.write(b64decode(result.screenshot)) - - # Save PDF + else: + print("[WARN] Screenshot data is None.") + if result.pdf: + print(f"[OK] PDF captured, size: {len(result.pdf)} bytes") with open("wikipedia_page.pdf", "wb") as f: f.write(result.pdf) - - print("[OK] PDF & screenshot captured.") + else: + print("[WARN] PDF data is None.") + else: print("[ERROR]", result.error_message)