Merge branch 'develop' into release/v0.7.5

This commit is contained in:
ntohidi
2025-10-13 12:34:45 +08:00
16 changed files with 1725 additions and 106 deletions

View File

@@ -103,7 +103,8 @@ from .browser_adapter import (
from .utils import ( from .utils import (
start_colab_display_server, start_colab_display_server,
setup_colab_environment setup_colab_environment,
hooks_to_string
) )
__all__ = [ __all__ = [
@@ -183,6 +184,7 @@ __all__ = [
"ProxyConfig", "ProxyConfig",
"start_colab_display_server", "start_colab_display_server",
"setup_colab_environment", "setup_colab_environment",
"hooks_to_string",
# C4A Script additions # C4A Script additions
"c4a_compile", "c4a_compile",
"c4a_validate", "c4a_validate",

View File

@@ -1,4 +1,4 @@
from typing import List, Optional, Union, AsyncGenerator, Dict, Any from typing import List, Optional, Union, AsyncGenerator, Dict, Any, Callable
import httpx import httpx
import json import json
from urllib.parse import urljoin from urllib.parse import urljoin
@@ -7,6 +7,7 @@ import asyncio
from .async_configs import BrowserConfig, CrawlerRunConfig from .async_configs import BrowserConfig, CrawlerRunConfig
from .models import CrawlResult from .models import CrawlResult
from .async_logger import AsyncLogger, LogLevel from .async_logger import AsyncLogger, LogLevel
from .utils import hooks_to_string
class Crawl4aiClientError(Exception): class Crawl4aiClientError(Exception):
@@ -70,17 +71,41 @@ class Crawl4aiDockerClient:
self.logger.error(f"Server unreachable: {str(e)}", tag="ERROR") self.logger.error(f"Server unreachable: {str(e)}", tag="ERROR")
raise ConnectionError(f"Cannot connect to server: {str(e)}") raise ConnectionError(f"Cannot connect to server: {str(e)}")
def _prepare_request(self, urls: List[str], browser_config: Optional[BrowserConfig] = None, def _prepare_request(
crawler_config: Optional[CrawlerRunConfig] = None) -> Dict[str, Any]: self,
urls: List[str],
browser_config: Optional[BrowserConfig] = None,
crawler_config: Optional[CrawlerRunConfig] = None,
hooks: Optional[Union[Dict[str, Callable], Dict[str, str]]] = None,
hooks_timeout: int = 30
) -> Dict[str, Any]:
"""Prepare request data from configs.""" """Prepare request data from configs."""
if self._token: if self._token:
self._http_client.headers["Authorization"] = f"Bearer {self._token}" self._http_client.headers["Authorization"] = f"Bearer {self._token}"
return {
request_data = {
"urls": urls, "urls": urls,
"browser_config": browser_config.dump() if browser_config else {}, "browser_config": browser_config.dump() if browser_config else {},
"crawler_config": crawler_config.dump() if crawler_config else {} "crawler_config": crawler_config.dump() if crawler_config else {}
} }
# Handle hooks if provided
if hooks:
# Check if hooks are already strings or need conversion
if any(callable(v) for v in hooks.values()):
# Convert function objects to strings
hooks_code = hooks_to_string(hooks)
else:
# Already in string format
hooks_code = hooks
request_data["hooks"] = {
"code": hooks_code,
"timeout": hooks_timeout
}
return request_data
async def _request(self, method: str, endpoint: str, **kwargs) -> httpx.Response: async def _request(self, method: str, endpoint: str, **kwargs) -> httpx.Response:
"""Make an HTTP request with error handling.""" """Make an HTTP request with error handling."""
url = urljoin(self.base_url, endpoint) url = urljoin(self.base_url, endpoint)
@@ -102,16 +127,42 @@ class Crawl4aiDockerClient:
self, self,
urls: List[str], urls: List[str],
browser_config: Optional[BrowserConfig] = None, browser_config: Optional[BrowserConfig] = None,
crawler_config: Optional[CrawlerRunConfig] = None crawler_config: Optional[CrawlerRunConfig] = None,
hooks: Optional[Union[Dict[str, Callable], Dict[str, str]]] = None,
hooks_timeout: int = 30
) -> Union[CrawlResult, List[CrawlResult], AsyncGenerator[CrawlResult, None]]: ) -> Union[CrawlResult, List[CrawlResult], AsyncGenerator[CrawlResult, None]]:
"""Execute a crawl operation.""" """
Execute a crawl operation.
Args:
urls: List of URLs to crawl
browser_config: Browser configuration
crawler_config: Crawler configuration
hooks: Optional hooks - can be either:
- Dict[str, Callable]: Function objects that will be converted to strings
- Dict[str, str]: Already stringified hook code
hooks_timeout: Timeout in seconds for each hook execution (1-120)
Returns:
Single CrawlResult, list of results, or async generator for streaming
Example with function hooks:
>>> async def my_hook(page, context, **kwargs):
... await page.set_viewport_size({"width": 1920, "height": 1080})
... return page
>>>
>>> result = await client.crawl(
... ["https://example.com"],
... hooks={"on_page_context_created": my_hook}
... )
"""
await self._check_server() await self._check_server()
data = self._prepare_request(urls, browser_config, crawler_config) data = self._prepare_request(urls, browser_config, crawler_config, hooks, hooks_timeout)
is_streaming = crawler_config and crawler_config.stream is_streaming = crawler_config and crawler_config.stream
self.logger.info(f"Crawling {len(urls)} URLs {'(streaming)' if is_streaming else ''}", tag="CRAWL") self.logger.info(f"Crawling {len(urls)} URLs {'(streaming)' if is_streaming else ''}", tag="CRAWL")
if is_streaming: if is_streaming:
async def stream_results() -> AsyncGenerator[CrawlResult, None]: async def stream_results() -> AsyncGenerator[CrawlResult, None]:
async with self._http_client.stream("POST", f"{self.base_url}/crawl/stream", json=data) as response: async with self._http_client.stream("POST", f"{self.base_url}/crawl/stream", json=data) as response:
@@ -128,12 +179,12 @@ class Crawl4aiDockerClient:
else: else:
yield CrawlResult(**result) yield CrawlResult(**result)
return stream_results() return stream_results()
response = await self._request("POST", "/crawl", json=data) response = await self._request("POST", "/crawl", json=data)
result_data = response.json() result_data = response.json()
if not result_data.get("success", False): if not result_data.get("success", False):
raise RequestError(f"Crawl failed: {result_data.get('msg', 'Unknown error')}") raise RequestError(f"Crawl failed: {result_data.get('msg', 'Unknown error')}")
results = [CrawlResult(**r) for r in result_data.get("results", [])] results = [CrawlResult(**r) for r in result_data.get("results", [])]
self.logger.success(f"Crawl completed with {len(results)} results", tag="CRAWL") self.logger.success(f"Crawl completed with {len(results)} results", tag="CRAWL")
return results[0] if len(results) == 1 else results return results[0] if len(results) == 1 else results

View File

@@ -47,6 +47,7 @@ from urllib.parse import (
urljoin, urlparse, urlunparse, urljoin, urlparse, urlunparse,
parse_qsl, urlencode, quote, unquote parse_qsl, urlencode, quote, unquote
) )
import inspect
# Monkey patch to fix wildcard handling in urllib.robotparser # Monkey patch to fix wildcard handling in urllib.robotparser
@@ -3529,4 +3530,52 @@ def get_memory_stats() -> Tuple[float, float, float]:
available_gb = get_true_available_memory_gb() available_gb = get_true_available_memory_gb()
used_percent = get_true_memory_usage_percent() used_percent = get_true_memory_usage_percent()
return used_percent, available_gb, total_gb return used_percent, available_gb, total_gb
# Hook utilities for Docker API
def hooks_to_string(hooks: Dict[str, Callable]) -> Dict[str, str]:
"""
Convert hook function objects to string representations for Docker API.
This utility simplifies the process of using hooks with the Docker API by converting
Python function objects into the string format required by the API.
Args:
hooks: Dictionary mapping hook point names to Python function objects.
Functions should be async and follow hook signature requirements.
Returns:
Dictionary mapping hook point names to string representations of the functions.
Example:
>>> async def my_hook(page, context, **kwargs):
... await page.set_viewport_size({"width": 1920, "height": 1080})
... return page
>>>
>>> hooks_dict = {"on_page_context_created": my_hook}
>>> api_hooks = hooks_to_string(hooks_dict)
>>> # api_hooks is now ready to use with Docker API
Raises:
ValueError: If a hook is not callable or source cannot be extracted
"""
result = {}
for hook_name, hook_func in hooks.items():
if not callable(hook_func):
raise ValueError(f"Hook '{hook_name}' must be a callable function, got {type(hook_func)}")
try:
# Get the source code of the function
source = inspect.getsource(hook_func)
# Remove any leading indentation to get clean source
source = textwrap.dedent(source)
result[hook_name] = source
except (OSError, TypeError) as e:
raise ValueError(
f"Cannot extract source code for hook '{hook_name}'. "
f"Make sure the function is defined in a file (not interactively). Error: {e}"
)
return result

View File

@@ -0,0 +1,522 @@
#!/usr/bin/env python3
"""
Comprehensive hooks examples using Docker Client with function objects.
This approach is recommended because:
- Write hooks as regular Python functions
- Full IDE support (autocomplete, type checking)
- Automatic conversion to API format
- Reusable and testable code
- Clean, readable syntax
"""
import asyncio
from crawl4ai import Crawl4aiDockerClient
# API_BASE_URL = "http://localhost:11235"
API_BASE_URL = "http://localhost:11234"
# ============================================================================
# Hook Function Definitions
# ============================================================================
# --- All Hooks Demo ---
async def browser_created_hook(browser, **kwargs):
"""Called after browser is created"""
print("[HOOK] Browser created and ready")
return browser
async def page_context_hook(page, context, **kwargs):
"""Setup page environment"""
print("[HOOK] Setting up page environment")
# Set viewport
await page.set_viewport_size({"width": 1920, "height": 1080})
# Add cookies
await context.add_cookies([{
"name": "test_session",
"value": "abc123xyz",
"domain": ".httpbin.org",
"path": "/"
}])
# Block resources
await context.route("**/*.{png,jpg,jpeg,gif}", lambda route: route.abort())
await context.route("**/analytics/*", lambda route: route.abort())
print("[HOOK] Environment configured")
return page
async def user_agent_hook(page, context, user_agent, **kwargs):
"""Called when user agent is updated"""
print(f"[HOOK] User agent: {user_agent[:50]}...")
return page
async def before_goto_hook(page, context, url, **kwargs):
"""Called before navigating to URL"""
print(f"[HOOK] Navigating to: {url}")
await page.set_extra_http_headers({
"X-Custom-Header": "crawl4ai-test",
"Accept-Language": "en-US"
})
return page
async def after_goto_hook(page, context, url, response, **kwargs):
"""Called after page loads"""
print(f"[HOOK] Page loaded: {url}")
await page.wait_for_timeout(1000)
try:
await page.wait_for_selector("body", timeout=2000)
print("[HOOK] Body element ready")
except:
print("[HOOK] Timeout, continuing")
return page
async def execution_started_hook(page, context, **kwargs):
"""Called when custom JS execution starts"""
print("[HOOK] JS execution started")
await page.evaluate("console.log('[HOOK] Custom JS');")
return page
async def before_retrieve_hook(page, context, **kwargs):
"""Called before retrieving HTML"""
print("[HOOK] Preparing HTML retrieval")
# Scroll for lazy content
await page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
await page.wait_for_timeout(500)
await page.evaluate("window.scrollTo(0, 0);")
print("[HOOK] Scrolling complete")
return page
async def before_return_hook(page, context, html, **kwargs):
"""Called before returning HTML"""
print(f"[HOOK] HTML ready: {len(html)} chars")
metrics = await page.evaluate('''() => ({
images: document.images.length,
links: document.links.length,
scripts: document.scripts.length
})''')
print(f"[HOOK] Metrics - Images: {metrics['images']}, Links: {metrics['links']}")
return page
# --- Authentication Hooks ---
async def auth_context_hook(page, context, **kwargs):
"""Setup authentication context"""
print("[HOOK] Setting up authentication")
# Add auth cookies
await context.add_cookies([{
"name": "auth_token",
"value": "fake_jwt_token",
"domain": ".httpbin.org",
"path": "/",
"httpOnly": True
}])
# Set localStorage
await page.evaluate('''
localStorage.setItem('user_id', '12345');
localStorage.setItem('auth_time', new Date().toISOString());
''')
print("[HOOK] Auth context ready")
return page
async def auth_headers_hook(page, context, url, **kwargs):
"""Add authentication headers"""
print(f"[HOOK] Adding auth headers for {url}")
import base64
credentials = base64.b64encode(b"user:passwd").decode('ascii')
await page.set_extra_http_headers({
'Authorization': f'Basic {credentials}',
'X-API-Key': 'test-key-123'
})
return page
# --- Performance Optimization Hooks ---
async def performance_hook(page, context, **kwargs):
"""Optimize page for performance"""
print("[HOOK] Optimizing for performance")
# Block resource-heavy content
await context.route("**/*.{png,jpg,jpeg,gif,webp,svg}", lambda r: r.abort())
await context.route("**/*.{woff,woff2,ttf}", lambda r: r.abort())
await context.route("**/*.{mp4,webm,ogg}", lambda r: r.abort())
await context.route("**/googletagmanager.com/*", lambda r: r.abort())
await context.route("**/google-analytics.com/*", lambda r: r.abort())
await context.route("**/facebook.com/*", lambda r: r.abort())
# Disable animations
await page.add_style_tag(content='''
*, *::before, *::after {
animation-duration: 0s !important;
transition-duration: 0s !important;
}
''')
print("[HOOK] Optimizations applied")
return page
async def cleanup_hook(page, context, **kwargs):
"""Clean page before extraction"""
print("[HOOK] Cleaning page")
await page.evaluate('''() => {
const selectors = [
'.ad', '.ads', '.advertisement',
'.popup', '.modal', '.overlay',
'.cookie-banner', '.newsletter'
];
selectors.forEach(sel => {
document.querySelectorAll(sel).forEach(el => el.remove());
});
document.querySelectorAll('script, style').forEach(el => el.remove());
}''')
print("[HOOK] Page cleaned")
return page
# --- Content Extraction Hooks ---
async def wait_dynamic_content_hook(page, context, url, response, **kwargs):
"""Wait for dynamic content to load"""
print(f"[HOOK] Waiting for dynamic content on {url}")
await page.wait_for_timeout(2000)
# Click "Load More" if exists
try:
load_more = await page.query_selector('[class*="load-more"], button:has-text("Load More")')
if load_more:
await load_more.click()
await page.wait_for_timeout(1000)
print("[HOOK] Clicked 'Load More'")
except:
pass
return page
async def extract_metadata_hook(page, context, **kwargs):
"""Extract page metadata"""
print("[HOOK] Extracting metadata")
metadata = await page.evaluate('''() => {
const getMeta = (name) => {
const el = document.querySelector(`meta[name="${name}"], meta[property="${name}"]`);
return el ? el.getAttribute('content') : null;
};
return {
title: document.title,
description: getMeta('description'),
author: getMeta('author'),
keywords: getMeta('keywords'),
};
}''')
print(f"[HOOK] Metadata: {metadata}")
# Infinite scroll
for i in range(3):
await page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
await page.wait_for_timeout(1000)
print(f"[HOOK] Scroll {i+1}/3")
return page
# --- Multi-URL Hooks ---
async def url_specific_hook(page, context, url, **kwargs):
"""Apply URL-specific logic"""
print(f"[HOOK] Processing URL: {url}")
# URL-specific headers
if 'html' in url:
await page.set_extra_http_headers({"X-Type": "HTML"})
elif 'json' in url:
await page.set_extra_http_headers({"X-Type": "JSON"})
return page
async def track_progress_hook(page, context, url, response, **kwargs):
"""Track crawl progress"""
status = response.status if response else 'unknown'
print(f"[HOOK] Loaded {url} - Status: {status}")
return page
# ============================================================================
# Test Functions
# ============================================================================
async def test_all_hooks_comprehensive():
"""Test all 8 hook types"""
print("=" * 70)
print("Test 1: All Hooks Comprehensive Demo (Docker Client)")
print("=" * 70)
async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client:
print("\nCrawling with all 8 hooks...")
# Define hooks with function objects
hooks = {
"on_browser_created": browser_created_hook,
"on_page_context_created": page_context_hook,
"on_user_agent_updated": user_agent_hook,
"before_goto": before_goto_hook,
"after_goto": after_goto_hook,
"on_execution_started": execution_started_hook,
"before_retrieve_html": before_retrieve_hook,
"before_return_html": before_return_hook
}
result = await client.crawl(
["https://httpbin.org/html"],
hooks=hooks,
hooks_timeout=30
)
print("\n✅ Success!")
print(f" URL: {result.url}")
print(f" Success: {result.success}")
print(f" HTML: {len(result.html)} chars")
async def test_authentication_workflow():
"""Test authentication with hooks"""
print("\n" + "=" * 70)
print("Test 2: Authentication Workflow (Docker Client)")
print("=" * 70)
async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client:
print("\nTesting authentication...")
hooks = {
"on_page_context_created": auth_context_hook,
"before_goto": auth_headers_hook
}
result = await client.crawl(
["https://httpbin.org/basic-auth/user/passwd"],
hooks=hooks,
hooks_timeout=15
)
print("\n✅ Authentication completed")
if result.success:
if '"authenticated"' in result.html and 'true' in result.html:
print(" ✅ Basic auth successful!")
else:
print(" ⚠️ Auth status unclear")
else:
print(f" ❌ Failed: {result.error_message}")
async def test_performance_optimization():
"""Test performance optimization"""
print("\n" + "=" * 70)
print("Test 3: Performance Optimization (Docker Client)")
print("=" * 70)
async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client:
print("\nTesting performance hooks...")
hooks = {
"on_page_context_created": performance_hook,
"before_retrieve_html": cleanup_hook
}
result = await client.crawl(
["https://httpbin.org/html"],
hooks=hooks,
hooks_timeout=10
)
print("\n✅ Optimization completed")
print(f" HTML size: {len(result.html):,} chars")
print(" Resources blocked, ads removed")
async def test_content_extraction():
"""Test content extraction"""
print("\n" + "=" * 70)
print("Test 4: Content Extraction (Docker Client)")
print("=" * 70)
async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client:
print("\nTesting extraction hooks...")
hooks = {
"after_goto": wait_dynamic_content_hook,
"before_retrieve_html": extract_metadata_hook
}
result = await client.crawl(
["https://www.kidocode.com/"],
hooks=hooks,
hooks_timeout=20
)
print("\n✅ Extraction completed")
print(f" URL: {result.url}")
print(f" Success: {result.success}")
print(f" Metadata: {result.metadata}")
async def test_multi_url_crawl():
"""Test hooks with multiple URLs"""
print("\n" + "=" * 70)
print("Test 5: Multi-URL Crawl (Docker Client)")
print("=" * 70)
async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client:
print("\nCrawling multiple URLs...")
hooks = {
"before_goto": url_specific_hook,
"after_goto": track_progress_hook
}
results = await client.crawl(
[
"https://httpbin.org/html",
"https://httpbin.org/json",
"https://httpbin.org/xml"
],
hooks=hooks,
hooks_timeout=15
)
print("\n✅ Multi-URL crawl completed")
print(f"\n Crawled {len(results)} URLs:")
for i, result in enumerate(results, 1):
status = "" if result.success else ""
print(f" {status} {i}. {result.url}")
async def test_reusable_hook_library():
"""Test using reusable hook library"""
print("\n" + "=" * 70)
print("Test 6: Reusable Hook Library (Docker Client)")
print("=" * 70)
# Create a library of reusable hooks
class HookLibrary:
@staticmethod
async def block_images(page, context, **kwargs):
"""Block all images"""
await context.route("**/*.{png,jpg,jpeg,gif}", lambda r: r.abort())
print("[LIBRARY] Images blocked")
return page
@staticmethod
async def block_analytics(page, context, **kwargs):
"""Block analytics"""
await context.route("**/analytics/*", lambda r: r.abort())
await context.route("**/google-analytics.com/*", lambda r: r.abort())
print("[LIBRARY] Analytics blocked")
return page
@staticmethod
async def scroll_infinite(page, context, **kwargs):
"""Handle infinite scroll"""
for i in range(5):
prev = await page.evaluate("document.body.scrollHeight")
await page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
await page.wait_for_timeout(1000)
curr = await page.evaluate("document.body.scrollHeight")
if curr == prev:
break
print("[LIBRARY] Infinite scroll complete")
return page
async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client:
print("\nUsing hook library...")
hooks = {
"on_page_context_created": HookLibrary.block_images,
"before_retrieve_html": HookLibrary.scroll_infinite
}
result = await client.crawl(
["https://www.kidocode.com/"],
hooks=hooks,
hooks_timeout=20
)
print("\n✅ Library hooks completed")
print(f" Success: {result.success}")
# ============================================================================
# Main
# ============================================================================
async def main():
"""Run all Docker client hook examples"""
print("🔧 Crawl4AI Docker Client - Hooks Examples (Function-Based)")
print("Using Python function objects with automatic conversion")
print("=" * 70)
tests = [
("All Hooks Demo", test_all_hooks_comprehensive),
("Authentication", test_authentication_workflow),
("Performance", test_performance_optimization),
("Extraction", test_content_extraction),
("Multi-URL", test_multi_url_crawl),
("Hook Library", test_reusable_hook_library)
]
for i, (name, test_func) in enumerate(tests, 1):
try:
await test_func()
print(f"\n✅ Test {i}/{len(tests)}: {name} completed\n")
except Exception as e:
print(f"\n❌ Test {i}/{len(tests)}: {name} failed: {e}\n")
import traceback
traceback.print_exc()
print("=" * 70)
print("🎉 All Docker client hook examples completed!")
print("\n💡 Key Benefits of Function-Based Hooks:")
print(" • Write as regular Python functions")
print(" • Full IDE support (autocomplete, types)")
print(" • Automatic conversion to API format")
print(" • Reusable across projects")
print(" • Clean, readable code")
print(" • Easy to test and debug")
print("=" * 70)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -6,18 +6,6 @@
- [Option 1: Using Pre-built Docker Hub Images (Recommended)](#option-1-using-pre-built-docker-hub-images-recommended) - [Option 1: Using Pre-built Docker Hub Images (Recommended)](#option-1-using-pre-built-docker-hub-images-recommended)
- [Option 2: Using Docker Compose](#option-2-using-docker-compose) - [Option 2: Using Docker Compose](#option-2-using-docker-compose)
- [Option 3: Manual Local Build & Run](#option-3-manual-local-build--run) - [Option 3: Manual Local Build & Run](#option-3-manual-local-build--run)
- [Dockerfile Parameters](#dockerfile-parameters)
- [Using the API](#using-the-api)
- [Playground Interface](#playground-interface)
- [Python SDK](#python-sdk)
- [Understanding Request Schema](#understanding-request-schema)
- [REST API Examples](#rest-api-examples)
- [Additional API Endpoints](#additional-api-endpoints)
- [HTML Extraction Endpoint](#html-extraction-endpoint)
- [Screenshot Endpoint](#screenshot-endpoint)
- [PDF Export Endpoint](#pdf-export-endpoint)
- [JavaScript Execution Endpoint](#javascript-execution-endpoint)
- [Library Context Endpoint](#library-context-endpoint)
- [MCP (Model Context Protocol) Support](#mcp-model-context-protocol-support) - [MCP (Model Context Protocol) Support](#mcp-model-context-protocol-support)
- [What is MCP?](#what-is-mcp) - [What is MCP?](#what-is-mcp)
- [Connecting via MCP](#connecting-via-mcp) - [Connecting via MCP](#connecting-via-mcp)
@@ -25,9 +13,28 @@
- [Available MCP Tools](#available-mcp-tools) - [Available MCP Tools](#available-mcp-tools)
- [Testing MCP Connections](#testing-mcp-connections) - [Testing MCP Connections](#testing-mcp-connections)
- [MCP Schemas](#mcp-schemas) - [MCP Schemas](#mcp-schemas)
- [Additional API Endpoints](#additional-api-endpoints)
- [HTML Extraction Endpoint](#html-extraction-endpoint)
- [Screenshot Endpoint](#screenshot-endpoint)
- [PDF Export Endpoint](#pdf-export-endpoint)
- [JavaScript Execution Endpoint](#javascript-execution-endpoint)
- [User-Provided Hooks API](#user-provided-hooks-api)
- [Hook Information Endpoint](#hook-information-endpoint)
- [Available Hook Points](#available-hook-points)
- [Using Hooks in Requests](#using-hooks-in-requests)
- [Hook Examples with Real URLs](#hook-examples-with-real-urls)
- [Security Best Practices](#security-best-practices)
- [Hook Response Information](#hook-response-information)
- [Error Handling](#error-handling)
- [Hooks Utility: Function-Based Approach (Python)](#hooks-utility-function-based-approach-python)
- [Dockerfile Parameters](#dockerfile-parameters)
- [Using the API](#using-the-api)
- [Playground Interface](#playground-interface)
- [Python SDK](#python-sdk)
- [Understanding Request Schema](#understanding-request-schema)
- [REST API Examples](#rest-api-examples)
- [LLM Configuration Examples](#llm-configuration-examples)
- [Metrics & Monitoring](#metrics--monitoring) - [Metrics & Monitoring](#metrics--monitoring)
- [Deployment Scenarios](#deployment-scenarios)
- [Complete Examples](#complete-examples)
- [Server Configuration](#server-configuration) - [Server Configuration](#server-configuration)
- [Understanding config.yml](#understanding-configyml) - [Understanding config.yml](#understanding-configyml)
- [JWT Authentication](#jwt-authentication) - [JWT Authentication](#jwt-authentication)
@@ -832,6 +839,275 @@ else:
> 💡 **Remember**: Always test your hooks on safe, known websites first before using them on production sites. Never crawl sites that you don't have permission to access or that might be malicious. > 💡 **Remember**: Always test your hooks on safe, known websites first before using them on production sites. Never crawl sites that you don't have permission to access or that might be malicious.
### Hooks Utility: Function-Based Approach (Python)
For Python developers, Crawl4AI provides a more convenient way to work with hooks using the `hooks_to_string()` utility function and Docker client integration.
#### Why Use Function-Based Hooks?
**String-Based Approach (shown above)**:
```python
hooks_code = {
"on_page_context_created": """
async def hook(page, context, **kwargs):
await page.set_viewport_size({"width": 1920, "height": 1080})
return page
"""
}
```
**Function-Based Approach (recommended for Python)**:
```python
from crawl4ai import Crawl4aiDockerClient
async def my_hook(page, context, **kwargs):
await page.set_viewport_size({"width": 1920, "height": 1080})
return page
async with Crawl4aiDockerClient(base_url="http://localhost:11235") as client:
result = await client.crawl(
["https://example.com"],
hooks={"on_page_context_created": my_hook}
)
```
**Benefits**:
- ✅ Write hooks as regular Python functions
- ✅ Full IDE support (autocomplete, syntax highlighting, type checking)
- ✅ Easy to test and debug
- ✅ Reusable hook libraries
- ✅ Automatic conversion to API format
#### Using the Hooks Utility
The `hooks_to_string()` utility converts Python function objects to the string format required by the API:
```python
from crawl4ai import hooks_to_string
# Define your hooks as functions
async def setup_hook(page, context, **kwargs):
await page.set_viewport_size({"width": 1920, "height": 1080})
await context.add_cookies([{
"name": "session",
"value": "token",
"domain": ".example.com"
}])
return page
async def scroll_hook(page, context, **kwargs):
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
return page
# Convert to string format
hooks_dict = {
"on_page_context_created": setup_hook,
"before_retrieve_html": scroll_hook
}
hooks_string = hooks_to_string(hooks_dict)
# Now use with REST API or Docker client
# hooks_string contains the string representations
```
#### Docker Client with Automatic Conversion
The Docker client automatically detects and converts function objects:
```python
from crawl4ai import Crawl4aiDockerClient
async def auth_hook(page, context, **kwargs):
"""Add authentication cookies"""
await context.add_cookies([{
"name": "auth_token",
"value": "your_token",
"domain": ".example.com"
}])
return page
async def performance_hook(page, context, **kwargs):
"""Block unnecessary resources"""
await context.route("**/*.{png,jpg,gif}", lambda r: r.abort())
await context.route("**/analytics/*", lambda r: r.abort())
return page
async with Crawl4aiDockerClient(base_url="http://localhost:11235") as client:
# Pass functions directly - automatic conversion!
result = await client.crawl(
["https://example.com"],
hooks={
"on_page_context_created": performance_hook,
"before_goto": auth_hook
},
hooks_timeout=30 # Optional timeout in seconds (1-120)
)
print(f"Success: {result.success}")
print(f"HTML: {len(result.html)} chars")
```
#### Creating Reusable Hook Libraries
Build collections of reusable hooks:
```python
# hooks_library.py
class CrawlHooks:
"""Reusable hook collection for common crawling tasks"""
@staticmethod
async def block_images(page, context, **kwargs):
"""Block all images to speed up crawling"""
await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda r: r.abort())
return page
@staticmethod
async def block_analytics(page, context, **kwargs):
"""Block analytics and tracking scripts"""
tracking_domains = [
"**/google-analytics.com/*",
"**/googletagmanager.com/*",
"**/facebook.com/tr/*",
"**/doubleclick.net/*"
]
for domain in tracking_domains:
await context.route(domain, lambda r: r.abort())
return page
@staticmethod
async def scroll_infinite(page, context, **kwargs):
"""Handle infinite scroll to load more content"""
previous_height = 0
for i in range(5): # Max 5 scrolls
current_height = await page.evaluate("document.body.scrollHeight")
if current_height == previous_height:
break
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
await page.wait_for_timeout(1000)
previous_height = current_height
return page
@staticmethod
async def wait_for_dynamic_content(page, context, url, response, **kwargs):
"""Wait for dynamic content to load"""
await page.wait_for_timeout(2000)
try:
# Click "Load More" if present
load_more = await page.query_selector('[class*="load-more"]')
if load_more:
await load_more.click()
await page.wait_for_timeout(1000)
except:
pass
return page
# Use in your application
from hooks_library import CrawlHooks
from crawl4ai import Crawl4aiDockerClient
async def crawl_with_optimizations(url):
async with Crawl4aiDockerClient() as client:
result = await client.crawl(
[url],
hooks={
"on_page_context_created": CrawlHooks.block_images,
"before_retrieve_html": CrawlHooks.scroll_infinite
}
)
return result
```
#### Choosing the Right Approach
| Approach | Best For | IDE Support | Language |
|----------|----------|-------------|----------|
| **String-based** | Non-Python clients, REST APIs, other languages | ❌ None | Any |
| **Function-based** | Python applications, local development | ✅ Full | Python only |
| **Docker Client** | Python apps with automatic conversion | ✅ Full | Python only |
**Recommendation**:
- **Python applications**: Use Docker client with function objects (easiest)
- **Non-Python or REST API**: Use string-based hooks (most flexible)
- **Manual control**: Use `hooks_to_string()` utility (middle ground)
#### Complete Example with Function Hooks
```python
from crawl4ai import Crawl4aiDockerClient, BrowserConfig, CrawlerRunConfig, CacheMode
# Define hooks as regular Python functions
async def setup_environment(page, context, **kwargs):
"""Setup crawling environment"""
# Set viewport
await page.set_viewport_size({"width": 1920, "height": 1080})
# Block resources for speed
await context.route("**/*.{png,jpg,gif}", lambda r: r.abort())
# Add custom headers
await page.set_extra_http_headers({
"Accept-Language": "en-US",
"X-Custom-Header": "Crawl4AI"
})
print("[HOOK] Environment configured")
return page
async def extract_content(page, context, **kwargs):
"""Extract and prepare content"""
# Scroll to load lazy content
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
await page.wait_for_timeout(1000)
# Extract metadata
metadata = await page.evaluate('''() => ({
title: document.title,
links: document.links.length,
images: document.images.length
})''')
print(f"[HOOK] Page metadata: {metadata}")
return page
async def main():
async with Crawl4aiDockerClient(base_url="http://localhost:11235", verbose=True) as client:
# Configure crawl
browser_config = BrowserConfig(headless=True)
crawler_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
# Crawl with hooks
result = await client.crawl(
["https://httpbin.org/html"],
browser_config=browser_config,
crawler_config=crawler_config,
hooks={
"on_page_context_created": setup_environment,
"before_retrieve_html": extract_content
},
hooks_timeout=30
)
if result.success:
print(f"✅ Crawl successful!")
print(f" URL: {result.url}")
print(f" HTML: {len(result.html)} chars")
print(f" Markdown: {len(result.markdown)} chars")
else:
print(f"❌ Crawl failed: {result.error_message}")
if __name__ == "__main__":
import asyncio
asyncio.run(main())
```
#### Additional Resources
- **Comprehensive Examples**: See `/docs/examples/hooks_docker_client_example.py` for Python function-based examples
- **REST API Examples**: See `/docs/examples/hooks_rest_api_example.py` for string-based examples
- **Comparison Guide**: See `/docs/examples/README_HOOKS.md` for detailed comparison
- **Utility Documentation**: See `/docs/hooks-utility-guide.md` for complete guide
--- ---
## Dockerfile Parameters ## Dockerfile Parameters
@@ -892,10 +1168,12 @@ This is the easiest way to translate Python configuration to JSON requests when
Install the SDK: `pip install crawl4ai` Install the SDK: `pip install crawl4ai`
The Python SDK provides a convenient way to interact with the Docker API, including **automatic hook conversion** when using function objects.
```python ```python
import asyncio import asyncio
from crawl4ai.docker_client import Crawl4aiDockerClient from crawl4ai.docker_client import Crawl4aiDockerClient
from crawl4ai import BrowserConfig, CrawlerRunConfig, CacheMode # Assuming you have crawl4ai installed from crawl4ai import BrowserConfig, CrawlerRunConfig, CacheMode
async def main(): async def main():
# Point to the correct server port # Point to the correct server port
@@ -907,23 +1185,22 @@ async def main():
print("--- Running Non-Streaming Crawl ---") print("--- Running Non-Streaming Crawl ---")
results = await client.crawl( results = await client.crawl(
["https://httpbin.org/html"], ["https://httpbin.org/html"],
browser_config=BrowserConfig(headless=True), # Use library classes for config aid browser_config=BrowserConfig(headless=True),
crawler_config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS) crawler_config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
) )
if results: # client.crawl returns None on failure if results:
print(f"Non-streaming results success: {results.success}") print(f"Non-streaming results success: {results.success}")
if results.success: if results.success:
for result in results: # Iterate through the CrawlResultContainer for result in results:
print(f"URL: {result.url}, Success: {result.success}") print(f"URL: {result.url}, Success: {result.success}")
else: else:
print("Non-streaming crawl failed.") print("Non-streaming crawl failed.")
# Example Streaming crawl # Example Streaming crawl
print("\n--- Running Streaming Crawl ---") print("\n--- Running Streaming Crawl ---")
stream_config = CrawlerRunConfig(stream=True, cache_mode=CacheMode.BYPASS) stream_config = CrawlerRunConfig(stream=True, cache_mode=CacheMode.BYPASS)
try: try:
async for result in await client.crawl( # client.crawl returns an async generator for streaming async for result in await client.crawl(
["https://httpbin.org/html", "https://httpbin.org/links/5/0"], ["https://httpbin.org/html", "https://httpbin.org/links/5/0"],
browser_config=BrowserConfig(headless=True), browser_config=BrowserConfig(headless=True),
crawler_config=stream_config crawler_config=stream_config
@@ -932,17 +1209,56 @@ async def main():
except Exception as e: except Exception as e:
print(f"Streaming crawl failed: {e}") print(f"Streaming crawl failed: {e}")
# Example with hooks (Python function objects)
print("\n--- Crawl with Hooks ---")
async def my_hook(page, context, **kwargs):
"""Custom hook to optimize performance"""
await page.set_viewport_size({"width": 1920, "height": 1080})
await context.route("**/*.{png,jpg}", lambda r: r.abort())
print("[HOOK] Page optimized")
return page
result = await client.crawl(
["https://httpbin.org/html"],
browser_config=BrowserConfig(headless=True),
crawler_config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS),
hooks={"on_page_context_created": my_hook}, # Pass function directly!
hooks_timeout=30
)
print(f"Crawl with hooks success: {result.success}")
# Example Get schema # Example Get schema
print("\n--- Getting Schema ---") print("\n--- Getting Schema ---")
schema = await client.get_schema() schema = await client.get_schema()
print(f"Schema received: {bool(schema)}") # Print whether schema was received print(f"Schema received: {bool(schema)}")
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(main()) asyncio.run(main())
``` ```
*(SDK parameters like timeout, verify_ssl etc. remain the same)* #### SDK Parameters
The Docker client supports the following parameters:
**Client Initialization**:
- `base_url` (str): URL of the Docker server (default: `http://localhost:8000`)
- `timeout` (float): Request timeout in seconds (default: 30.0)
- `verify_ssl` (bool): Verify SSL certificates (default: True)
- `verbose` (bool): Enable verbose logging (default: True)
- `log_file` (Optional[str]): Path to log file (default: None)
**crawl() Method**:
- `urls` (List[str]): List of URLs to crawl
- `browser_config` (Optional[BrowserConfig]): Browser configuration
- `crawler_config` (Optional[CrawlerRunConfig]): Crawler configuration
- `hooks` (Optional[Dict]): Hook functions or strings - **automatically converts function objects!**
- `hooks_timeout` (int): Timeout for each hook execution in seconds (default: 30)
**Returns**:
- Single URL: `CrawlResult` object
- Multiple URLs: `List[CrawlResult]`
- Streaming: `AsyncGenerator[CrawlResult]`
### Second Approach: Direct API Calls ### Second Approach: Direct API Calls
@@ -1352,19 +1668,40 @@ We're here to help you succeed with Crawl4AI! Here's how to get support:
In this guide, we've covered everything you need to get started with Crawl4AI's Docker deployment: In this guide, we've covered everything you need to get started with Crawl4AI's Docker deployment:
- Building and running the Docker container - Building and running the Docker container
- Configuring the environment - Configuring the environment
- Using the interactive playground for testing - Using the interactive playground for testing
- Making API requests with proper typing - Making API requests with proper typing
- Using the Python SDK - Using the Python SDK with **automatic hook conversion**
- **Working with hooks** - both string-based (REST API) and function-based (Python SDK)
- Leveraging specialized endpoints for screenshots, PDFs, and JavaScript execution - Leveraging specialized endpoints for screenshots, PDFs, and JavaScript execution
- Connecting via the Model Context Protocol (MCP) - Connecting via the Model Context Protocol (MCP)
- Monitoring your deployment - Monitoring your deployment
The new playground interface at `http://localhost:11235/playground` makes it much easier to test configurations and generate the corresponding JSON for API requests. ### Key Features
For AI application developers, the MCP integration allows tools like Claude Code to directly access Crawl4AI's capabilities without complex API handling. **Hooks Support**: Crawl4AI offers two approaches for working with hooks:
- **String-based** (REST API): Works with any language, requires manual string formatting
- **Function-based** (Python SDK): Write hooks as regular Python functions with full IDE support and automatic conversion
Remember, the examples in the `examples` folder are your friends - they show real-world usage patterns that you can adapt for your needs. **Playground Interface**: The built-in playground at `http://localhost:11235/playground` makes it easy to test configurations and generate corresponding JSON for API requests.
**MCP Integration**: For AI application developers, the MCP integration allows tools like Claude Code to directly access Crawl4AI's capabilities without complex API handling.
### Next Steps
1. **Explore Examples**: Check out the comprehensive examples in:
- `/docs/examples/hooks_docker_client_example.py` - Python function-based hooks
- `/docs/examples/hooks_rest_api_example.py` - REST API string-based hooks
- `/docs/examples/README_HOOKS.md` - Comparison and guide
2. **Read Documentation**:
- `/docs/hooks-utility-guide.md` - Complete hooks utility guide
- API documentation for detailed configuration options
3. **Join the Community**:
- GitHub: Report issues and contribute
- Discord: Get help and share your experiences
- Documentation: Comprehensive guides and tutorials
Keep exploring, and don't hesitate to reach out if you need help! We're building something amazing together. 🚀 Keep exploring, and don't hesitate to reach out if you need help! We're building something amazing together. 🚀

View File

@@ -30,10 +30,10 @@ const { API_BASE, API_ORIGIN } = (() => {
if (origin) { if (origin) {
const normalized = cleanOrigin(origin); const normalized = cleanOrigin(origin);
return { API_BASE: `${normalized}/api`, API_ORIGIN: normalized }; return { API_BASE: `${normalized}/marketplace/api`, API_ORIGIN: normalized };
} }
return { API_BASE: '/api', API_ORIGIN: '' }; return { API_BASE: '/marketplace/api', API_ORIGIN: '' };
})(); })();
const resolveAssetUrl = (path) => { const resolveAssetUrl = (path) => {

View File

@@ -210,6 +210,6 @@
</div> </div>
</div> </div>
<script src="admin.js?v=1759334000"></script> <script src="admin.js?v=1759335000"></script>
</body> </body>
</html> </html>

View File

@@ -197,6 +197,41 @@
} }
/* Navigation Tabs */ /* Navigation Tabs */
.tabs {
display: flex;
flex-direction: row;
gap: 0;
border-bottom: 2px solid var(--border-color);
margin-bottom: 0;
background: var(--bg-tertiary);
}
.tab-btn {
padding: 1rem 2rem;
background: transparent;
border: none;
border-bottom: 3px solid transparent;
color: var(--text-secondary);
cursor: pointer;
transition: all 0.2s;
font-family: inherit;
font-size: 0.95rem;
margin-bottom: -2px;
white-space: nowrap;
font-weight: 500;
}
.tab-btn:hover {
color: var(--primary-cyan);
background: rgba(80, 255, 255, 0.05);
}
.tab-btn.active {
color: var(--primary-cyan);
border-bottom-color: var(--primary-cyan);
background: var(--bg-secondary);
}
.app-nav { .app-nav {
max-width: 1800px; max-width: 1800px;
margin: 2rem auto 0; margin: 2rem auto 0;
@@ -228,34 +263,167 @@
border-bottom-color: var(--primary-cyan); border-bottom-color: var(--primary-cyan);
} }
/* Content Sections */ /* Main Content Wrapper */
.app-content { .app-main {
max-width: 1800px; max-width: 1800px;
margin: 2rem auto; margin: 2rem auto;
padding: 0 2rem; padding: 0 2rem;
} }
/* Content Sections */
.app-content {
background: var(--bg-secondary);
border: 1px solid var(--border-color);
padding: 0;
}
.tab-content { .tab-content {
display: none; display: none;
padding: 2rem;
} }
.tab-content.active { .tab-content.active {
display: block; display: block;
} }
.docs-content { /* Overview Layout */
max-width: 1200px; .overview-columns {
padding: 2rem; display: grid;
grid-template-columns: 2fr 1fr;
gap: 2rem;
}
.overview-main h2, .overview-main h3 {
color: var(--primary-cyan);
margin-top: 2rem;
margin-bottom: 1rem;
}
.overview-main h2:first-child {
margin-top: 0;
}
.overview-main h2 {
font-size: 1.8rem;
border-bottom: 2px solid var(--border-color);
padding-bottom: 0.5rem;
}
.overview-main h3 {
font-size: 1.3rem;
}
.features-list {
list-style: none;
padding: 0;
}
.features-list li {
padding: 0.5rem 0;
padding-left: 1.5rem;
position: relative;
color: var(--text-secondary);
}
.features-list li:before {
content: "▸";
position: absolute;
left: 0;
color: var(--primary-cyan);
}
.use-cases p {
color: var(--text-secondary);
line-height: 1.6;
}
/* Sidebar */
.sidebar {
display: flex;
flex-direction: column;
gap: 1rem;
}
.sidebar-card {
background: var(--bg-secondary); background: var(--bg-secondary);
border: 1px solid var(--border-color); border: 1px solid var(--border-color);
padding: 1.5rem;
}
.sidebar-card h3 {
font-size: 1.1rem;
color: var(--primary-cyan);
margin: 0 0 1rem 0;
border-bottom: 1px solid var(--border-color);
padding-bottom: 0.5rem;
}
.stats-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 1rem;
}
.stats-grid > div {
text-align: center;
}
.metadata {
margin: 0;
}
.metadata div {
display: flex;
justify-content: space-between;
padding: 0.75rem 0;
border-bottom: 1px solid var(--border-color);
}
.metadata dt {
color: var(--text-tertiary);
font-weight: normal;
}
.metadata dd {
color: var(--text-primary);
margin: 0;
font-weight: 600;
}
.sidebar-card p {
color: var(--text-secondary);
margin: 0;
}
/* Integration Content */
.integration-content {
max-width: 100%;
}
.integration-content h2 {
font-size: 1.8rem;
color: var(--primary-cyan);
margin: 0 0 2rem 0;
padding-bottom: 0.5rem;
border-bottom: 2px solid var(--border-color);
}
.integration-content h3 {
font-size: 1.3rem;
color: var(--text-primary);
margin: 2rem 0 1rem;
}
.docs-content {
max-width: 100%;
} }
.docs-content h2 { .docs-content h2 {
font-size: 1.8rem; font-size: 1.8rem;
color: var(--primary-cyan); color: var(--primary-cyan);
margin-bottom: 1rem; margin: 0 0 1.5rem 0;
padding-bottom: 0.5rem; padding-bottom: 0.5rem;
border-bottom: 1px solid var(--border-color); border-bottom: 2px solid var(--border-color);
} }
.docs-content h3 { .docs-content h3 {
@@ -290,6 +458,7 @@
border: 1px solid var(--border-color); border: 1px solid var(--border-color);
margin: 1rem 0; margin: 1rem 0;
overflow: hidden; overflow: hidden;
position: relative;
} }
.code-header { .code-header {
@@ -308,18 +477,23 @@
} }
.copy-btn { .copy-btn {
padding: 0.25rem 0.5rem; position: absolute;
background: transparent; top: 0.5rem;
right: 0.5rem;
padding: 0.4rem 0.8rem;
background: var(--bg-tertiary);
border: 1px solid var(--border-color); border: 1px solid var(--border-color);
color: var(--text-secondary); color: var(--text-secondary);
cursor: pointer; cursor: pointer;
font-size: 0.75rem; font-size: 0.75rem;
transition: all 0.2s; transition: all 0.2s;
z-index: 10;
} }
.copy-btn:hover { .copy-btn:hover {
border-color: var(--primary-cyan); border-color: var(--primary-cyan);
color: var(--primary-cyan); color: var(--primary-cyan);
background: var(--bg-secondary);
} }
.code-block pre { .code-block pre {
@@ -435,6 +609,10 @@
.app-stats { .app-stats {
justify-content: space-around; justify-content: space-around;
} }
.overview-columns {
grid-template-columns: 1fr;
}
} }
@media (max-width: 768px) { @media (max-width: 768px) {
@@ -446,6 +624,16 @@
flex-direction: column; flex-direction: column;
} }
.tabs {
overflow-x: auto;
-webkit-overflow-scrolling: touch;
}
.tab-btn {
padding: 0.75rem 1.5rem;
font-size: 0.875rem;
}
.app-nav { .app-nav {
overflow-x: auto; overflow-x: auto;
gap: 0; gap: 0;
@@ -459,4 +647,12 @@
.support-grid { .support-grid {
grid-template-columns: 1fr; grid-template-columns: 1fr;
} }
.tab-content {
padding: 1rem;
}
.app-main {
padding: 0 1rem;
}
} }

View File

@@ -0,0 +1,209 @@
<!DOCTYPE html>
<html lang="en" data-theme="dark">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>App Details - Crawl4AI Marketplace</title>
<link rel="stylesheet" href="marketplace.css">
<link rel="stylesheet" href="app-detail.css">
</head>
<body>
<div class="app-detail-container">
<!-- Header -->
<header class="marketplace-header">
<div class="header-content">
<div class="header-left">
<div class="logo-title">
<img src="../assets/images/logo.png" alt="Crawl4AI" class="header-logo">
<h1>
<span class="ascii-border">[</span>
Marketplace
<span class="ascii-border">]</span>
</h1>
</div>
</div>
<div class="header-nav">
<a href="index.html" class="back-btn">← Back to Marketplace</a>
</div>
</div>
</header>
<!-- App Hero Section -->
<section class="app-hero">
<div class="app-hero-content">
<div class="app-hero-image" id="app-image">
<!-- Dynamic image -->
</div>
<div class="app-hero-info">
<div class="app-badges">
<span class="app-badge" id="app-type">Open Source</span>
<span class="app-badge featured" id="app-featured" style="display:none">FEATURED</span>
<span class="app-badge sponsored" id="app-sponsored" style="display:none">SPONSORED</span>
</div>
<h1 id="app-name">App Name</h1>
<p id="app-description" class="app-tagline">App description goes here</p>
<div class="app-stats">
<div class="stat">
<span class="stat-value" id="app-rating">★★★★★</span>
<span class="stat-label">Rating</span>
</div>
<div class="stat">
<span class="stat-value" id="app-downloads">0</span>
<span class="stat-label">Downloads</span>
</div>
<div class="stat">
<span class="stat-value" id="app-category">Category</span>
<span class="stat-label">Category</span>
</div>
</div>
<div class="app-actions">
<a href="#" id="app-website" class="action-btn primary" target="_blank">Visit Website</a>
<a href="#" id="app-github" class="action-btn" target="_blank">View GitHub</a>
<a href="#" id="app-demo" class="action-btn" target="_blank" style="display:none">Live Demo</a>
</div>
</div>
</div>
</section>
<!-- App Details Section -->
<main class="app-main">
<div class="app-content">
<div class="tabs">
<button class="tab-btn active" data-tab="overview">Overview</button>
<button class="tab-btn" data-tab="integration">Integration</button>
<button class="tab-btn" data-tab="docs">Documentation</button>
<button class="tab-btn" data-tab="support">Support</button>
</div>
<section id="overview-tab" class="tab-content active">
<div class="overview-columns">
<div class="overview-main">
<h2>Overview</h2>
<div id="app-overview">Overview content goes here.</div>
<h3>Key Features</h3>
<ul id="app-features" class="features-list">
<li>Feature 1</li>
<li>Feature 2</li>
<li>Feature 3</li>
</ul>
<h3>Use Cases</h3>
<div id="app-use-cases" class="use-cases">
<p>Describe how this app can help your workflow.</p>
</div>
</div>
<aside class="sidebar">
<div class="sidebar-card">
<h3>Download Stats</h3>
<div class="stats-grid">
<div>
<span class="stat-value" id="sidebar-downloads">0</span>
<span class="stat-label">Downloads</span>
</div>
<div>
<span class="stat-value" id="sidebar-rating">0.0</span>
<span class="stat-label">Rating</span>
</div>
</div>
</div>
<div class="sidebar-card">
<h3>App Metadata</h3>
<dl class="metadata">
<div>
<dt>Category</dt>
<dd id="sidebar-category">-</dd>
</div>
<div>
<dt>Type</dt>
<dd id="sidebar-type">-</dd>
</div>
<div>
<dt>Status</dt>
<dd id="sidebar-status">Active</dd>
</div>
<div>
<dt>Pricing</dt>
<dd id="sidebar-pricing">-</dd>
</div>
</dl>
</div>
<div class="sidebar-card">
<h3>Contact</h3>
<p id="sidebar-contact">contact@example.com</p>
</div>
</aside>
</div>
</section>
<section id="integration-tab" class="tab-content">
<div class="integration-content">
<h2>Integration Guide</h2>
<h3>Installation</h3>
<div class="code-block">
<pre><code id="install-code"># Installation instructions will appear here</code></pre>
</div>
<h3>Basic Usage</h3>
<div class="code-block">
<pre><code id="usage-code"># Usage example will appear here</code></pre>
</div>
<h3>Complete Integration Example</h3>
<div class="code-block">
<button class="copy-btn" id="copy-integration">Copy</button>
<pre><code id="integration-code"># Complete integration guide will appear here</code></pre>
</div>
</div>
</section>
<section id="docs-tab" class="tab-content">
<div class="docs-content">
<h2>Documentation</h2>
<div id="app-docs" class="doc-sections">
<p>Documentation coming soon.</p>
</div>
</div>
</section>
<section id="support-tab" class="tab-content">
<div class="docs-content">
<h2>Support</h2>
<div class="support-grid">
<div class="support-card">
<h3>📧 Contact</h3>
<p id="app-contact">contact@example.com</p>
</div>
<div class="support-card">
<h3>🐛 Report Issues</h3>
<p>Found a bug? Report it on GitHub Issues.</p>
</div>
<div class="support-card">
<h3>💬 Community</h3>
<p>Join our Discord for help and discussions.</p>
</div>
</div>
</div>
</section>
</div>
</main>
<!-- Related Apps -->
<section class="related-apps">
<h2>Related Apps</h2>
<div id="related-apps-grid" class="related-grid">
<!-- Dynamic related apps -->
</div>
</section>
</div>
<script src="app-detail.js"></script>
</body>
</html>

View File

@@ -1,5 +1,15 @@
// App Detail Page JavaScript // App Detail Page JavaScript
const API_BASE = '/api'; const { API_BASE, API_ORIGIN } = (() => {
const { hostname, port, protocol } = window.location;
const isLocalHost = ['localhost', '127.0.0.1', '0.0.0.0'].includes(hostname);
if (isLocalHost && port && port !== '8100') {
const origin = `${protocol}//127.0.0.1:8100`;
return { API_BASE: `${origin}/marketplace/api`, API_ORIGIN: origin };
}
return { API_BASE: '/marketplace/api', API_ORIGIN: '' };
})();
class AppDetailPage { class AppDetailPage {
constructor() { constructor() {
@@ -70,7 +80,6 @@ class AppDetailPage {
document.getElementById('app-description').textContent = this.appData.description; document.getElementById('app-description').textContent = this.appData.description;
document.getElementById('app-type').textContent = this.appData.type || 'Open Source'; document.getElementById('app-type').textContent = this.appData.type || 'Open Source';
document.getElementById('app-category').textContent = this.appData.category; document.getElementById('app-category').textContent = this.appData.category;
document.getElementById('app-pricing').textContent = this.appData.pricing || 'Free';
// Badges // Badges
if (this.appData.featured) { if (this.appData.featured) {
@@ -105,6 +114,15 @@ class AppDetailPage {
// Contact // Contact
document.getElementById('app-contact').textContent = this.appData.contact_email || 'Not available'; document.getElementById('app-contact').textContent = this.appData.contact_email || 'Not available';
// Sidebar info
document.getElementById('sidebar-downloads').textContent = this.formatNumber(this.appData.downloads || 0);
document.getElementById('sidebar-rating').textContent = (this.appData.rating || 0).toFixed(1);
document.getElementById('sidebar-category').textContent = this.appData.category || '-';
document.getElementById('sidebar-type').textContent = this.appData.type || '-';
document.getElementById('sidebar-status').textContent = this.appData.status || 'Active';
document.getElementById('sidebar-pricing').textContent = this.appData.pricing || 'Free';
document.getElementById('sidebar-contact').textContent = this.appData.contact_email || 'contact@example.com';
// Integration guide // Integration guide
this.renderIntegrationGuide(); this.renderIntegrationGuide();
} }
@@ -112,24 +130,27 @@ class AppDetailPage {
renderIntegrationGuide() { renderIntegrationGuide() {
// Installation code // Installation code
const installCode = document.getElementById('install-code'); const installCode = document.getElementById('install-code');
if (this.appData.type === 'Open Source' && this.appData.github_url) { if (installCode) {
installCode.textContent = `# Clone from GitHub if (this.appData.type === 'Open Source' && this.appData.github_url) {
installCode.textContent = `# Clone from GitHub
git clone ${this.appData.github_url} git clone ${this.appData.github_url}
# Install dependencies # Install dependencies
pip install -r requirements.txt`; pip install -r requirements.txt`;
} else if (this.appData.name.toLowerCase().includes('api')) { } else if (this.appData.name.toLowerCase().includes('api')) {
installCode.textContent = `# Install via pip installCode.textContent = `# Install via pip
pip install ${this.appData.slug} pip install ${this.appData.slug}
# Or install from source # Or install from source
pip install git+${this.appData.github_url || 'https://github.com/example/repo'}`; pip install git+${this.appData.github_url || 'https://github.com/example/repo'}`;
}
} }
// Usage code - customize based on category // Usage code - customize based on category
const usageCode = document.getElementById('usage-code'); const usageCode = document.getElementById('usage-code');
if (this.appData.category === 'Browser Automation') { if (usageCode) {
usageCode.textContent = `from crawl4ai import AsyncWebCrawler if (this.appData.category === 'Browser Automation') {
usageCode.textContent = `from crawl4ai import AsyncWebCrawler
from ${this.appData.slug.replace(/-/g, '_')} import ${this.appData.name.replace(/\s+/g, '')} from ${this.appData.slug.replace(/-/g, '_')} import ${this.appData.name.replace(/\s+/g, '')}
async def main(): async def main():
@@ -178,11 +199,13 @@ async with AsyncWebCrawler() as crawler:
extraction_strategy=strategy extraction_strategy=strategy
) )
print(result.extracted_content)`; print(result.extracted_content)`;
}
} }
// Integration example // Integration example
const integrationCode = document.getElementById('integration-code'); const integrationCode = document.getElementById('integration-code');
integrationCode.textContent = this.appData.integration_guide || if (integrationCode) {
integrationCode.textContent = this.appData.integration_guide ||
`# Complete ${this.appData.name} Integration Example `# Complete ${this.appData.name} Integration Example
from crawl4ai import AsyncWebCrawler from crawl4ai import AsyncWebCrawler
@@ -237,6 +260,7 @@ async def crawl_with_${this.appData.slug.replace(/-/g, '_')}():
if __name__ == "__main__": if __name__ == "__main__":
import asyncio import asyncio
asyncio.run(crawl_with_${this.appData.slug.replace(/-/g, '_')}())`; asyncio.run(crawl_with_${this.appData.slug.replace(/-/g, '_')}())`;
}
} }
formatNumber(num) { formatNumber(num) {
@@ -250,7 +274,7 @@ if __name__ == "__main__":
setupEventListeners() { setupEventListeners() {
// Tab switching // Tab switching
const tabs = document.querySelectorAll('.nav-tab'); const tabs = document.querySelectorAll('.tab-btn');
tabs.forEach(tab => { tabs.forEach(tab => {
tab.addEventListener('click', () => { tab.addEventListener('click', () => {
// Update active tab // Update active tab

View File

@@ -1,4 +1,4 @@
from fastapi import FastAPI, HTTPException, Query, Depends, Body, UploadFile, File, Form from fastapi import FastAPI, HTTPException, Query, Depends, Body, UploadFile, File, Form, APIRouter
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
@@ -16,6 +16,7 @@ from datetime import datetime, timedelta
from config import Config from config import Config
app = FastAPI(title="Crawl4AI Marketplace API") app = FastAPI(title="Crawl4AI Marketplace API")
router = APIRouter(prefix="/marketplace/api")
# Security setup # Security setup
security = HTTPBearer() security = HTTPBearer()
@@ -84,7 +85,7 @@ def to_int(value, default=0):
# ============= PUBLIC ENDPOINTS ============= # ============= PUBLIC ENDPOINTS =============
@app.get("/api/apps") @router.get("/apps")
async def get_apps( async def get_apps(
category: Optional[str] = None, category: Optional[str] = None,
type: Optional[str] = None, type: Optional[str] = None,
@@ -114,7 +115,7 @@ async def get_apps(
return json_response(apps) return json_response(apps)
@app.get("/api/apps/{slug}") @router.get("/apps/{slug}")
async def get_app(slug: str): async def get_app(slug: str):
"""Get single app by slug""" """Get single app by slug"""
apps = db.get_all('apps', where=f"slug = '{slug}'", limit=1) apps = db.get_all('apps', where=f"slug = '{slug}'", limit=1)
@@ -127,7 +128,7 @@ async def get_app(slug: str):
return json_response(app) return json_response(app)
@app.get("/api/articles") @router.get("/articles")
async def get_articles( async def get_articles(
category: Optional[str] = None, category: Optional[str] = None,
limit: int = Query(default=20, le=10000), limit: int = Query(default=20, le=10000),
@@ -146,7 +147,7 @@ async def get_articles(
return json_response(articles) return json_response(articles)
@app.get("/api/articles/{slug}") @router.get("/articles/{slug}")
async def get_article(slug: str): async def get_article(slug: str):
"""Get single article by slug""" """Get single article by slug"""
articles = db.get_all('articles', where=f"slug = '{slug}'", limit=1) articles = db.get_all('articles', where=f"slug = '{slug}'", limit=1)
@@ -161,7 +162,7 @@ async def get_article(slug: str):
return json_response(article) return json_response(article)
@app.get("/api/categories") @router.get("/categories")
async def get_categories(): async def get_categories():
"""Get all categories ordered by index""" """Get all categories ordered by index"""
categories = db.get_all('categories', limit=50) categories = db.get_all('categories', limit=50)
@@ -170,7 +171,7 @@ async def get_categories():
categories.sort(key=lambda x: x.get('order_index', 0)) categories.sort(key=lambda x: x.get('order_index', 0))
return json_response(categories, cache_time=7200) return json_response(categories, cache_time=7200)
@app.get("/api/sponsors") @router.get("/sponsors")
async def get_sponsors(active: Optional[bool] = True): async def get_sponsors(active: Optional[bool] = True):
"""Get sponsors, default active only""" """Get sponsors, default active only"""
where = f"active = {1 if active else 0}" if active is not None else None where = f"active = {1 if active else 0}" if active is not None else None
@@ -185,7 +186,7 @@ async def get_sponsors(active: Optional[bool] = True):
return json_response(sponsors) return json_response(sponsors)
@app.get("/api/search") @router.get("/search")
async def search(q: str = Query(min_length=2)): async def search(q: str = Query(min_length=2)):
"""Search across apps and articles""" """Search across apps and articles"""
if len(q) < 2: if len(q) < 2:
@@ -206,7 +207,7 @@ async def search(q: str = Query(min_length=2)):
return json_response(results, cache_time=1800) return json_response(results, cache_time=1800)
@app.get("/api/stats") @router.get("/stats")
async def get_stats(): async def get_stats():
"""Get marketplace statistics""" """Get marketplace statistics"""
stats = { stats = {
@@ -227,7 +228,7 @@ def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
return token return token
@app.post("/api/admin/upload-image", dependencies=[Depends(verify_token)]) @router.post("/admin/upload-image", dependencies=[Depends(verify_token)])
async def upload_image(file: UploadFile = File(...), folder: str = Form("sponsors")): async def upload_image(file: UploadFile = File(...), folder: str = Form("sponsors")):
"""Upload image files for admin assets""" """Upload image files for admin assets"""
folder = (folder or "").strip().lower() folder = (folder or "").strip().lower()
@@ -251,7 +252,7 @@ async def upload_image(file: UploadFile = File(...), folder: str = Form("sponsor
return {"url": f"/uploads/{folder}/{filename}"} return {"url": f"/uploads/{folder}/{filename}"}
@app.post("/api/admin/login") @router.post("/admin/login")
async def admin_login(password: str = Body(..., embed=True)): async def admin_login(password: str = Body(..., embed=True)):
"""Admin login with password""" """Admin login with password"""
provided_hash = hashlib.sha256(password.encode()).hexdigest() provided_hash = hashlib.sha256(password.encode()).hexdigest()
@@ -272,7 +273,7 @@ async def admin_login(password: str = Body(..., embed=True)):
# ============= ADMIN ENDPOINTS ============= # ============= ADMIN ENDPOINTS =============
@app.get("/api/admin/stats", dependencies=[Depends(verify_token)]) @router.get("/admin/stats", dependencies=[Depends(verify_token)])
async def get_admin_stats(): async def get_admin_stats():
"""Get detailed admin statistics""" """Get detailed admin statistics"""
stats = { stats = {
@@ -292,7 +293,7 @@ async def get_admin_stats():
return stats return stats
# Apps CRUD # Apps CRUD
@app.post("/api/admin/apps", dependencies=[Depends(verify_token)]) @router.post("/admin/apps", dependencies=[Depends(verify_token)])
async def create_app(app_data: Dict[str, Any]): async def create_app(app_data: Dict[str, Any]):
"""Create new app""" """Create new app"""
try: try:
@@ -311,7 +312,7 @@ async def create_app(app_data: Dict[str, Any]):
except Exception as e: except Exception as e:
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))
@app.put("/api/admin/apps/{app_id}", dependencies=[Depends(verify_token)]) @router.put("/admin/apps/{app_id}", dependencies=[Depends(verify_token)])
async def update_app(app_id: int, app_data: Dict[str, Any]): async def update_app(app_id: int, app_data: Dict[str, Any]):
"""Update app""" """Update app"""
try: try:
@@ -329,7 +330,7 @@ async def update_app(app_id: int, app_data: Dict[str, Any]):
except Exception as e: except Exception as e:
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))
@app.delete("/api/admin/apps/{app_id}", dependencies=[Depends(verify_token)]) @router.delete("/admin/apps/{app_id}", dependencies=[Depends(verify_token)])
async def delete_app(app_id: int): async def delete_app(app_id: int):
"""Delete app""" """Delete app"""
cursor = db.conn.cursor() cursor = db.conn.cursor()
@@ -338,7 +339,7 @@ async def delete_app(app_id: int):
return {"message": "App deleted"} return {"message": "App deleted"}
# Articles CRUD # Articles CRUD
@app.post("/api/admin/articles", dependencies=[Depends(verify_token)]) @router.post("/admin/articles", dependencies=[Depends(verify_token)])
async def create_article(article_data: Dict[str, Any]): async def create_article(article_data: Dict[str, Any]):
"""Create new article""" """Create new article"""
try: try:
@@ -356,7 +357,7 @@ async def create_article(article_data: Dict[str, Any]):
except Exception as e: except Exception as e:
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))
@app.put("/api/admin/articles/{article_id}", dependencies=[Depends(verify_token)]) @router.put("/admin/articles/{article_id}", dependencies=[Depends(verify_token)])
async def update_article(article_id: int, article_data: Dict[str, Any]): async def update_article(article_id: int, article_data: Dict[str, Any]):
"""Update article""" """Update article"""
try: try:
@@ -373,7 +374,7 @@ async def update_article(article_id: int, article_data: Dict[str, Any]):
except Exception as e: except Exception as e:
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))
@app.delete("/api/admin/articles/{article_id}", dependencies=[Depends(verify_token)]) @router.delete("/admin/articles/{article_id}", dependencies=[Depends(verify_token)])
async def delete_article(article_id: int): async def delete_article(article_id: int):
"""Delete article""" """Delete article"""
cursor = db.conn.cursor() cursor = db.conn.cursor()
@@ -382,7 +383,7 @@ async def delete_article(article_id: int):
return {"message": "Article deleted"} return {"message": "Article deleted"}
# Categories CRUD # Categories CRUD
@app.post("/api/admin/categories", dependencies=[Depends(verify_token)]) @router.post("/admin/categories", dependencies=[Depends(verify_token)])
async def create_category(category_data: Dict[str, Any]): async def create_category(category_data: Dict[str, Any]):
"""Create new category""" """Create new category"""
try: try:
@@ -399,7 +400,7 @@ async def create_category(category_data: Dict[str, Any]):
except Exception as e: except Exception as e:
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))
@app.put("/api/admin/categories/{cat_id}", dependencies=[Depends(verify_token)]) @router.put("/admin/categories/{cat_id}", dependencies=[Depends(verify_token)])
async def update_category(cat_id: int, category_data: Dict[str, Any]): async def update_category(cat_id: int, category_data: Dict[str, Any]):
"""Update category""" """Update category"""
try: try:
@@ -417,7 +418,7 @@ async def update_category(cat_id: int, category_data: Dict[str, Any]):
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))
@app.delete("/api/admin/categories/{cat_id}", dependencies=[Depends(verify_token)]) @router.delete("/admin/categories/{cat_id}", dependencies=[Depends(verify_token)])
async def delete_category(cat_id: int): async def delete_category(cat_id: int):
"""Delete category""" """Delete category"""
try: try:
@@ -429,7 +430,7 @@ async def delete_category(cat_id: int):
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))
# Sponsors CRUD # Sponsors CRUD
@app.post("/api/admin/sponsors", dependencies=[Depends(verify_token)]) @router.post("/admin/sponsors", dependencies=[Depends(verify_token)])
async def create_sponsor(sponsor_data: Dict[str, Any]): async def create_sponsor(sponsor_data: Dict[str, Any]):
"""Create new sponsor""" """Create new sponsor"""
try: try:
@@ -443,7 +444,7 @@ async def create_sponsor(sponsor_data: Dict[str, Any]):
except Exception as e: except Exception as e:
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))
@app.put("/api/admin/sponsors/{sponsor_id}", dependencies=[Depends(verify_token)]) @router.put("/admin/sponsors/{sponsor_id}", dependencies=[Depends(verify_token)])
async def update_sponsor(sponsor_id: int, sponsor_data: Dict[str, Any]): async def update_sponsor(sponsor_id: int, sponsor_data: Dict[str, Any]):
"""Update sponsor""" """Update sponsor"""
try: try:
@@ -457,7 +458,7 @@ async def update_sponsor(sponsor_id: int, sponsor_data: Dict[str, Any]):
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))
@app.delete("/api/admin/sponsors/{sponsor_id}", dependencies=[Depends(verify_token)]) @router.delete("/admin/sponsors/{sponsor_id}", dependencies=[Depends(verify_token)])
async def delete_sponsor(sponsor_id: int): async def delete_sponsor(sponsor_id: int):
"""Delete sponsor""" """Delete sponsor"""
try: try:
@@ -468,6 +469,9 @@ async def delete_sponsor(sponsor_id: int):
except Exception as e: except Exception as e:
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))
app.include_router(router)
@app.get("/") @app.get("/")
async def root(): async def root():
"""API info""" """API info"""
@@ -475,12 +479,12 @@ async def root():
"name": "Crawl4AI Marketplace API", "name": "Crawl4AI Marketplace API",
"version": "1.0.0", "version": "1.0.0",
"endpoints": [ "endpoints": [
"/api/apps", "/marketplace/api/apps",
"/api/articles", "/marketplace/api/articles",
"/api/categories", "/marketplace/api/categories",
"/api/sponsors", "/marketplace/api/sponsors",
"/api/search?q=query", "/marketplace/api/search?q=query",
"/api/stats" "/marketplace/api/stats"
] ]
} }

View File

@@ -1,5 +1,15 @@
// App Detail Page JavaScript // App Detail Page JavaScript
const API_BASE = '/api'; const { API_BASE, API_ORIGIN } = (() => {
const { hostname, port, protocol } = window.location;
const isLocalHost = ['localhost', '127.0.0.1', '0.0.0.0'].includes(hostname);
if (isLocalHost && port && port !== '8100') {
const origin = `${protocol}//127.0.0.1:8100`;
return { API_BASE: `${origin}/marketplace/api`, API_ORIGIN: origin };
}
return { API_BASE: '/marketplace/api', API_ORIGIN: '' };
})();
class AppDetailPage { class AppDetailPage {
constructor() { constructor() {

View File

@@ -1,5 +1,5 @@
// Marketplace JS - Magazine Layout // Marketplace JS - Magazine Layout
const API_BASE = '/api'; const API_BASE = '/marketplace/api';
const CACHE_TTL = 3600000; // 1 hour in ms const CACHE_TTL = 3600000; // 1 hour in ms
class MarketplaceCache { class MarketplaceCache {

View File

@@ -231,7 +231,9 @@ a:hover {
.hero-image { .hero-image {
width: 100%; width: 100%;
height: 240px; height: 200px;
min-height: 200px;
max-height: 200px;
background: linear-gradient(135deg, rgba(80, 255, 255, 0.1), rgba(243, 128, 245, 0.05)); background: linear-gradient(135deg, rgba(80, 255, 255, 0.1), rgba(243, 128, 245, 0.05));
background-size: cover; background-size: cover;
background-position: center; background-position: center;
@@ -243,6 +245,14 @@ a:hover {
flex-shrink: 0; flex-shrink: 0;
position: relative; position: relative;
filter: brightness(1.1) contrast(1.1); filter: brightness(1.1) contrast(1.1);
overflow: hidden;
}
.hero-image img {
width: 100%;
height: 100%;
object-fit: cover;
object-position: center;
} }
.hero-image::after { .hero-image::after {
@@ -257,6 +267,10 @@ a:hover {
.hero-content { .hero-content {
padding: 1.5rem; padding: 1.5rem;
flex: 1;
display: flex;
flex-direction: column;
justify-content: space-between;
} }
.hero-badge { .hero-badge {
@@ -301,9 +315,9 @@ a:hover {
/* Secondary Featured */ /* Secondary Featured */
.secondary-featured { .secondary-featured {
grid-column: 1 / -1; grid-column: 1 / -1;
height: 380px; min-height: 380px;
display: flex; display: flex;
align-items: stretch; align-items: flex-start;
} }
.featured-secondary-cards { .featured-secondary-cards {
@@ -311,7 +325,7 @@ a:hover {
display: flex; display: flex;
flex-direction: column; flex-direction: column;
gap: 0.75rem; gap: 0.75rem;
justify-content: space-between; align-items: stretch;
} }
.secondary-card { .secondary-card {
@@ -321,8 +335,10 @@ a:hover {
transition: all 0.3s ease; transition: all 0.3s ease;
display: flex; display: flex;
overflow: hidden; overflow: hidden;
height: calc((380px - 1.5rem) / 3); height: 118px;
flex: 1; min-height: 118px;
max-height: 118px;
flex-shrink: 0;
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.3); box-shadow: 0 2px 10px rgba(0, 0, 0, 0.3);
} }
@@ -875,10 +891,13 @@ a:hover {
.secondary-featured { .secondary-featured {
grid-column: 3 / 5; grid-column: 3 / 5;
grid-row: 1; grid-row: 1;
min-height: auto;
} }
.featured-secondary-cards { .featured-secondary-cards {
display: grid;
grid-template-columns: repeat(2, 1fr); grid-template-columns: repeat(2, 1fr);
flex-direction: unset;
} }
.main-content { .main-content {
@@ -906,10 +925,13 @@ a:hover {
.secondary-featured { .secondary-featured {
grid-column: 3 / 6; grid-column: 3 / 6;
min-height: auto;
} }
.featured-secondary-cards { .featured-secondary-cards {
display: grid;
grid-template-columns: repeat(3, 1fr); grid-template-columns: repeat(3, 1fr);
flex-direction: unset;
} }
.sponsored-section { .sponsored-section {

View File

@@ -3,9 +3,9 @@ const { API_BASE, API_ORIGIN } = (() => {
const { hostname, port } = window.location; const { hostname, port } = window.location;
if ((hostname === 'localhost' || hostname === '127.0.0.1') && port === '8000') { if ((hostname === 'localhost' || hostname === '127.0.0.1') && port === '8000') {
const origin = 'http://127.0.0.1:8100'; const origin = 'http://127.0.0.1:8100';
return { API_BASE: `${origin}/api`, API_ORIGIN: origin }; return { API_BASE: `${origin}/marketplace/api`, API_ORIGIN: origin };
} }
return { API_BASE: '/api', API_ORIGIN: '' }; return { API_BASE: '/marketplace/api', API_ORIGIN: '' };
})(); })();
const resolveAssetUrl = (path) => { const resolveAssetUrl = (path) => {

View File

@@ -0,0 +1,193 @@
"""
Test script demonstrating the hooks_to_string utility and Docker client integration.
"""
import asyncio
from crawl4ai import Crawl4aiDockerClient, hooks_to_string
# Define hook functions as regular Python functions
async def auth_hook(page, context, **kwargs):
"""Add authentication cookies."""
await context.add_cookies([{
'name': 'test_cookie',
'value': 'test_value',
'domain': '.httpbin.org',
'path': '/'
}])
return page
async def scroll_hook(page, context, **kwargs):
"""Scroll to load lazy content."""
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
await page.wait_for_timeout(1000)
return page
async def viewport_hook(page, context, **kwargs):
"""Set custom viewport."""
await page.set_viewport_size({"width": 1920, "height": 1080})
return page
async def test_hooks_utility():
"""Test the hooks_to_string utility function."""
print("=" * 60)
print("Testing hooks_to_string utility")
print("=" * 60)
# Create hooks dictionary with function objects
hooks_dict = {
"on_page_context_created": auth_hook,
"before_retrieve_html": scroll_hook
}
# Convert to string format
hooks_string = hooks_to_string(hooks_dict)
print("\n✓ Successfully converted function objects to strings")
print(f"\n✓ Converted {len(hooks_string)} hooks:")
for hook_name in hooks_string.keys():
print(f" - {hook_name}")
print("\n✓ Preview of converted hook:")
print("-" * 60)
print(hooks_string["on_page_context_created"][:200] + "...")
print("-" * 60)
return hooks_string
async def test_docker_client_with_functions():
"""Test Docker client with function objects (automatic conversion)."""
print("\n" + "=" * 60)
print("Testing Docker Client with Function Objects")
print("=" * 60)
# Note: This requires a running Crawl4AI Docker server
# Uncomment the following to test with actual server:
async with Crawl4aiDockerClient(base_url="http://localhost:11234", verbose=True) as client:
# Pass function objects directly - they'll be converted automatically
result = await client.crawl(
["https://httpbin.org/html"],
hooks={
"on_page_context_created": auth_hook,
"before_retrieve_html": scroll_hook
},
hooks_timeout=30
)
print(f"\n✓ Crawl successful: {result.success}")
print(f"✓ URL: {result.url}")
print("\n✓ Docker client accepts function objects directly")
print("✓ Automatic conversion happens internally")
print("✓ No manual string formatting needed!")
async def test_docker_client_with_strings():
"""Test Docker client with pre-converted strings."""
print("\n" + "=" * 60)
print("Testing Docker Client with String Hooks")
print("=" * 60)
# Convert hooks to strings first
hooks_dict = {
"on_page_context_created": viewport_hook,
"before_retrieve_html": scroll_hook
}
hooks_string = hooks_to_string(hooks_dict)
# Note: This requires a running Crawl4AI Docker server
# Uncomment the following to test with actual server:
async with Crawl4aiDockerClient(base_url="http://localhost:11234", verbose=True) as client:
# Pass string hooks - they'll be used as-is
result = await client.crawl(
["https://httpbin.org/html"],
hooks=hooks_string,
hooks_timeout=30
)
print(f"\n✓ Crawl successful: {result.success}")
print("\n✓ Docker client also accepts pre-converted strings")
print("✓ Backward compatible with existing code")
async def show_usage_patterns():
"""Show different usage patterns."""
print("\n" + "=" * 60)
print("Usage Patterns")
print("=" * 60)
print("\n1. Direct function usage (simplest):")
print("-" * 60)
print("""
async def my_hook(page, context, **kwargs):
await page.set_viewport_size({"width": 1920, "height": 1080})
return page
result = await client.crawl(
["https://example.com"],
hooks={"on_page_context_created": my_hook}
)
""")
print("\n2. Convert then use:")
print("-" * 60)
print("""
hooks_dict = {"on_page_context_created": my_hook}
hooks_string = hooks_to_string(hooks_dict)
result = await client.crawl(
["https://example.com"],
hooks=hooks_string
)
""")
print("\n3. Manual string (backward compatible):")
print("-" * 60)
print("""
hooks_string = {
"on_page_context_created": '''
async def hook(page, context, **kwargs):
await page.set_viewport_size({"width": 1920, "height": 1080})
return page
'''
}
result = await client.crawl(
["https://example.com"],
hooks=hooks_string
)
""")
async def main():
"""Run all tests."""
print("\n🚀 Crawl4AI Hooks Utility Test Suite\n")
# Test the utility function
# await test_hooks_utility()
# Show usage with Docker client
# await test_docker_client_with_functions()
await test_docker_client_with_strings()
# Show different patterns
# await show_usage_patterns()
# print("\n" + "=" * 60)
# print("✓ All tests completed successfully!")
# print("=" * 60)
# print("\nKey Benefits:")
# print(" • Write hooks as regular Python functions")
# print(" • IDE support with autocomplete and type checking")
# print(" • Automatic conversion to API format")
# print(" • Backward compatible with string hooks")
# print(" • Same utility used everywhere")
# print("\n")
if __name__ == "__main__":
asyncio.run(main())