From aadab30c3dc8f5d92aa754ff2dc03ce0d9260621 Mon Sep 17 00:00:00 2001
From: ntohidi <nasrin@kidocode.com>
Date: Mon, 13 Oct 2025 13:08:47 +0800
Subject: [PATCH] fix(docs): clarify Docker Hooks System with function-based
 API in README

---
 README.md | 56 ++++++++++++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 25 deletions(-)
diff --git a/README.md b/README.md
index 58d4bf4c..ef0002e1 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ Crawl4AI turns the web into clean, LLM ready Markdown for RAG, agents, and data
 
 [✨ Check out latest update v0.7.5](#-recent-updates)
 
-✨ New in v0.7.5: Docker Hooks System for pipeline customization, Enhanced LLM Integration with custom providers, HTTPS Preservation, and multiple community-reported bug fixes. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.5.md)
+✨ New in v0.7.5: Docker Hooks System with function-based API for pipeline customization, Enhanced LLM Integration with custom providers, HTTPS Preservation, and multiple community-reported bug fixes. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.5.md)
 
 ✨ Recent v0.7.4: Revolutionary LLM Table Extraction with intelligent chunking, enhanced concurrency fixes, memory management refactor, and critical stability improvements. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.4.md)
 
@@ -179,7 +179,7 @@ No rate-limited APIs. No lock-in. Build and own your data pipeline with direct g
 - 📸 **Screenshots**: Capture page screenshots during crawling for debugging or analysis.
 - 📂 **Raw Data Crawling**: Directly process raw HTML (`raw:`) or local files (`file://`).
 - 🔗 **Comprehensive Link Extraction**: Extracts internal, external links, and embedded iframe content.
-- 🛠️ **Customizable Hooks**: Define hooks at every step to customize crawling behavior.
+- 🛠️ **Customizable Hooks**: Define hooks at every step to customize crawling behavior (supports both string and function-based APIs).
 - 💾 **Caching**: Cache data for improved speed and to avoid redundant fetches.
 - 📄 **Metadata Extraction**: Retrieve structured metadata from web pages.
 - 📡 **IFrame Content Extraction**: Seamless extraction from embedded iframe content.
@@ -549,34 +549,40 @@ async def test_news_crawl():
 <details>
 <summary><strong>Version 0.7.5 Release Highlights - The Docker Hooks & Security Update</strong></summary>
 
-- **🔧 Docker Hooks System**: Complete pipeline customization with user-provided Python functions:
+- **🔧 Docker Hooks System**: Complete pipeline customization with user-provided Python functions at 8 key points
+- **✨ Function-Based Hooks API (NEW)**: Write hooks as regular Python functions with full IDE support:
   ```python
-  import requests
+  from crawl4ai import hooks_to_string
+  from crawl4ai.docker_client import Crawl4aiDockerClient
 
-  # Real working hooks for httpbin.org
-  hooks_config = {
-      "on_page_context_created": """
-  async def hook(page, context, **kwargs):
-      print("Hook: Setting up page context")
-      # Block images to speed up crawling
+  # Define hooks as regular Python functions
+  async def on_page_context_created(page, context, **kwargs):
+      """Block images to speed up crawling"""
       await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
+      await page.set_viewport_size({"width": 1920, "height": 1080})
       return page
-  """,
-      "before_goto": """
-  async def hook(page, context, url, **kwargs):
-      print(f"Hook: About to navigate to {url}")
-      # Add custom headers
-      await page.set_extra_http_headers({'X-Test-Header': 'crawl4ai-hooks-test'})
-      return page
-  """
-  }
 
-  # Test with Docker API
-  payload = {
-      "urls": ["https://httpbin.org/html"],
-      "hooks": {"code": hooks_config, "timeout": 30}
-  }
-  response = requests.post("http://localhost:11235/crawl", json=payload)
+  async def before_goto(page, context, url, **kwargs):
+      """Add custom headers"""
+      await page.set_extra_http_headers({'X-Crawl4AI': 'v0.7.5'})
+      return page
+
+  # Option 1: Use hooks_to_string() utility for REST API
+  hooks_code = hooks_to_string({
+      "on_page_context_created": on_page_context_created,
+      "before_goto": before_goto
+  })
+
+  # Option 2: Docker client with automatic conversion (Recommended)
+  client = Crawl4aiDockerClient(base_url="http://localhost:11235")
+  results = await client.crawl(
+      urls=["https://httpbin.org/html"],
+      hooks={
+          "on_page_context_created": on_page_context_created,
+          "before_goto": before_goto
+      }
+  )
+  # ✓ Full IDE support, type checking, and reusability!
   ```
 
 - **🤖 Enhanced LLM Integration**: Custom providers with temperature control and base_url configuration