From 5a4f21fad987ef074fdabfcb962d8dd273cabd69 Mon Sep 17 00:00:00 2001 From: unclecode Date: Thu, 9 Oct 2025 22:26:15 +0800 Subject: [PATCH 1/4] fix(marketplace): isolate api under marketplace prefix --- docs/md_v2/marketplace/admin/admin.js | 4 +- docs/md_v2/marketplace/admin/index.html | 2 +- docs/md_v2/marketplace/app-detail.js | 2 +- docs/md_v2/marketplace/backend/server.py | 64 ++++++++++--------- docs/md_v2/marketplace/frontend/app-detail.js | 2 +- .../md_v2/marketplace/frontend/marketplace.js | 2 +- docs/md_v2/marketplace/marketplace.js | 4 +- 7 files changed, 42 insertions(+), 38 deletions(-) diff --git a/docs/md_v2/marketplace/admin/admin.js b/docs/md_v2/marketplace/admin/admin.js index 7d31a826..d43dd822 100644 --- a/docs/md_v2/marketplace/admin/admin.js +++ b/docs/md_v2/marketplace/admin/admin.js @@ -30,10 +30,10 @@ const { API_BASE, API_ORIGIN } = (() => { if (origin) { const normalized = cleanOrigin(origin); - return { API_BASE: `${normalized}/api`, API_ORIGIN: normalized }; + return { API_BASE: `${normalized}/marketplace/api`, API_ORIGIN: normalized }; } - return { API_BASE: '/api', API_ORIGIN: '' }; + return { API_BASE: '/marketplace/api', API_ORIGIN: '' }; })(); const resolveAssetUrl = (path) => { diff --git a/docs/md_v2/marketplace/admin/index.html b/docs/md_v2/marketplace/admin/index.html index a9b649f2..0d302384 100644 --- a/docs/md_v2/marketplace/admin/index.html +++ b/docs/md_v2/marketplace/admin/index.html @@ -210,6 +210,6 @@ - + \ No newline at end of file diff --git a/docs/md_v2/marketplace/app-detail.js b/docs/md_v2/marketplace/app-detail.js index d1b3b559..622f9c88 100644 --- a/docs/md_v2/marketplace/app-detail.js +++ b/docs/md_v2/marketplace/app-detail.js @@ -1,5 +1,5 @@ // App Detail Page JavaScript -const API_BASE = '/api'; +const API_BASE = '/marketplace/api'; class AppDetailPage { constructor() { diff --git a/docs/md_v2/marketplace/backend/server.py b/docs/md_v2/marketplace/backend/server.py index 0f177386..a414ad80 100644 --- a/docs/md_v2/marketplace/backend/server.py +++ b/docs/md_v2/marketplace/backend/server.py @@ -1,4 +1,4 @@ -from fastapi import FastAPI, HTTPException, Query, Depends, Body, UploadFile, File, Form +from fastapi import FastAPI, HTTPException, Query, Depends, Body, UploadFile, File, Form, APIRouter from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from fastapi.staticfiles import StaticFiles @@ -16,6 +16,7 @@ from datetime import datetime, timedelta from config import Config app = FastAPI(title="Crawl4AI Marketplace API") +router = APIRouter(prefix="/marketplace/api") # Security setup security = HTTPBearer() @@ -84,7 +85,7 @@ def to_int(value, default=0): # ============= PUBLIC ENDPOINTS ============= -@app.get("/api/apps") +@router.get("/apps") async def get_apps( category: Optional[str] = None, type: Optional[str] = None, @@ -114,7 +115,7 @@ async def get_apps( return json_response(apps) -@app.get("/api/apps/{slug}") +@router.get("/apps/{slug}") async def get_app(slug: str): """Get single app by slug""" apps = db.get_all('apps', where=f"slug = '{slug}'", limit=1) @@ -127,7 +128,7 @@ async def get_app(slug: str): return json_response(app) -@app.get("/api/articles") +@router.get("/articles") async def get_articles( category: Optional[str] = None, limit: int = Query(default=20, le=10000), @@ -146,7 +147,7 @@ async def get_articles( return json_response(articles) -@app.get("/api/articles/{slug}") +@router.get("/articles/{slug}") async def get_article(slug: str): """Get single article by slug""" articles = db.get_all('articles', where=f"slug = '{slug}'", limit=1) @@ -161,7 +162,7 @@ async def get_article(slug: str): return json_response(article) -@app.get("/api/categories") +@router.get("/categories") async def get_categories(): """Get all categories ordered by index""" categories = db.get_all('categories', limit=50) @@ -170,7 +171,7 @@ async def get_categories(): categories.sort(key=lambda x: x.get('order_index', 0)) return json_response(categories, cache_time=7200) -@app.get("/api/sponsors") +@router.get("/sponsors") async def get_sponsors(active: Optional[bool] = True): """Get sponsors, default active only""" where = f"active = {1 if active else 0}" if active is not None else None @@ -185,7 +186,7 @@ async def get_sponsors(active: Optional[bool] = True): return json_response(sponsors) -@app.get("/api/search") +@router.get("/search") async def search(q: str = Query(min_length=2)): """Search across apps and articles""" if len(q) < 2: @@ -206,7 +207,7 @@ async def search(q: str = Query(min_length=2)): return json_response(results, cache_time=1800) -@app.get("/api/stats") +@router.get("/stats") async def get_stats(): """Get marketplace statistics""" stats = { @@ -227,7 +228,7 @@ def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)): return token -@app.post("/api/admin/upload-image", dependencies=[Depends(verify_token)]) +@router.post("/admin/upload-image", dependencies=[Depends(verify_token)]) async def upload_image(file: UploadFile = File(...), folder: str = Form("sponsors")): """Upload image files for admin assets""" folder = (folder or "").strip().lower() @@ -251,7 +252,7 @@ async def upload_image(file: UploadFile = File(...), folder: str = Form("sponsor return {"url": f"/uploads/{folder}/{filename}"} -@app.post("/api/admin/login") +@router.post("/admin/login") async def admin_login(password: str = Body(..., embed=True)): """Admin login with password""" provided_hash = hashlib.sha256(password.encode()).hexdigest() @@ -272,7 +273,7 @@ async def admin_login(password: str = Body(..., embed=True)): # ============= ADMIN ENDPOINTS ============= -@app.get("/api/admin/stats", dependencies=[Depends(verify_token)]) +@router.get("/admin/stats", dependencies=[Depends(verify_token)]) async def get_admin_stats(): """Get detailed admin statistics""" stats = { @@ -292,7 +293,7 @@ async def get_admin_stats(): return stats # Apps CRUD -@app.post("/api/admin/apps", dependencies=[Depends(verify_token)]) +@router.post("/admin/apps", dependencies=[Depends(verify_token)]) async def create_app(app_data: Dict[str, Any]): """Create new app""" try: @@ -311,7 +312,7 @@ async def create_app(app_data: Dict[str, Any]): except Exception as e: raise HTTPException(status_code=400, detail=str(e)) -@app.put("/api/admin/apps/{app_id}", dependencies=[Depends(verify_token)]) +@router.put("/admin/apps/{app_id}", dependencies=[Depends(verify_token)]) async def update_app(app_id: int, app_data: Dict[str, Any]): """Update app""" try: @@ -329,7 +330,7 @@ async def update_app(app_id: int, app_data: Dict[str, Any]): except Exception as e: raise HTTPException(status_code=400, detail=str(e)) -@app.delete("/api/admin/apps/{app_id}", dependencies=[Depends(verify_token)]) +@router.delete("/admin/apps/{app_id}", dependencies=[Depends(verify_token)]) async def delete_app(app_id: int): """Delete app""" cursor = db.conn.cursor() @@ -338,7 +339,7 @@ async def delete_app(app_id: int): return {"message": "App deleted"} # Articles CRUD -@app.post("/api/admin/articles", dependencies=[Depends(verify_token)]) +@router.post("/admin/articles", dependencies=[Depends(verify_token)]) async def create_article(article_data: Dict[str, Any]): """Create new article""" try: @@ -356,7 +357,7 @@ async def create_article(article_data: Dict[str, Any]): except Exception as e: raise HTTPException(status_code=400, detail=str(e)) -@app.put("/api/admin/articles/{article_id}", dependencies=[Depends(verify_token)]) +@router.put("/admin/articles/{article_id}", dependencies=[Depends(verify_token)]) async def update_article(article_id: int, article_data: Dict[str, Any]): """Update article""" try: @@ -373,7 +374,7 @@ async def update_article(article_id: int, article_data: Dict[str, Any]): except Exception as e: raise HTTPException(status_code=400, detail=str(e)) -@app.delete("/api/admin/articles/{article_id}", dependencies=[Depends(verify_token)]) +@router.delete("/admin/articles/{article_id}", dependencies=[Depends(verify_token)]) async def delete_article(article_id: int): """Delete article""" cursor = db.conn.cursor() @@ -382,7 +383,7 @@ async def delete_article(article_id: int): return {"message": "Article deleted"} # Categories CRUD -@app.post("/api/admin/categories", dependencies=[Depends(verify_token)]) +@router.post("/admin/categories", dependencies=[Depends(verify_token)]) async def create_category(category_data: Dict[str, Any]): """Create new category""" try: @@ -399,7 +400,7 @@ async def create_category(category_data: Dict[str, Any]): except Exception as e: raise HTTPException(status_code=400, detail=str(e)) -@app.put("/api/admin/categories/{cat_id}", dependencies=[Depends(verify_token)]) +@router.put("/admin/categories/{cat_id}", dependencies=[Depends(verify_token)]) async def update_category(cat_id: int, category_data: Dict[str, Any]): """Update category""" try: @@ -417,7 +418,7 @@ async def update_category(cat_id: int, category_data: Dict[str, Any]): raise HTTPException(status_code=400, detail=str(e)) -@app.delete("/api/admin/categories/{cat_id}", dependencies=[Depends(verify_token)]) +@router.delete("/admin/categories/{cat_id}", dependencies=[Depends(verify_token)]) async def delete_category(cat_id: int): """Delete category""" try: @@ -429,7 +430,7 @@ async def delete_category(cat_id: int): raise HTTPException(status_code=400, detail=str(e)) # Sponsors CRUD -@app.post("/api/admin/sponsors", dependencies=[Depends(verify_token)]) +@router.post("/admin/sponsors", dependencies=[Depends(verify_token)]) async def create_sponsor(sponsor_data: Dict[str, Any]): """Create new sponsor""" try: @@ -443,7 +444,7 @@ async def create_sponsor(sponsor_data: Dict[str, Any]): except Exception as e: raise HTTPException(status_code=400, detail=str(e)) -@app.put("/api/admin/sponsors/{sponsor_id}", dependencies=[Depends(verify_token)]) +@router.put("/admin/sponsors/{sponsor_id}", dependencies=[Depends(verify_token)]) async def update_sponsor(sponsor_id: int, sponsor_data: Dict[str, Any]): """Update sponsor""" try: @@ -457,7 +458,7 @@ async def update_sponsor(sponsor_id: int, sponsor_data: Dict[str, Any]): raise HTTPException(status_code=400, detail=str(e)) -@app.delete("/api/admin/sponsors/{sponsor_id}", dependencies=[Depends(verify_token)]) +@router.delete("/admin/sponsors/{sponsor_id}", dependencies=[Depends(verify_token)]) async def delete_sponsor(sponsor_id: int): """Delete sponsor""" try: @@ -468,6 +469,9 @@ async def delete_sponsor(sponsor_id: int): except Exception as e: raise HTTPException(status_code=400, detail=str(e)) +app.include_router(router) + + @app.get("/") async def root(): """API info""" @@ -475,12 +479,12 @@ async def root(): "name": "Crawl4AI Marketplace API", "version": "1.0.0", "endpoints": [ - "/api/apps", - "/api/articles", - "/api/categories", - "/api/sponsors", - "/api/search?q=query", - "/api/stats" + "/marketplace/api/apps", + "/marketplace/api/articles", + "/marketplace/api/categories", + "/marketplace/api/sponsors", + "/marketplace/api/search?q=query", + "/marketplace/api/stats" ] } diff --git a/docs/md_v2/marketplace/frontend/app-detail.js b/docs/md_v2/marketplace/frontend/app-detail.js index d1b3b559..622f9c88 100644 --- a/docs/md_v2/marketplace/frontend/app-detail.js +++ b/docs/md_v2/marketplace/frontend/app-detail.js @@ -1,5 +1,5 @@ // App Detail Page JavaScript -const API_BASE = '/api'; +const API_BASE = '/marketplace/api'; class AppDetailPage { constructor() { diff --git a/docs/md_v2/marketplace/frontend/marketplace.js b/docs/md_v2/marketplace/frontend/marketplace.js index 94a401bf..df07257c 100644 --- a/docs/md_v2/marketplace/frontend/marketplace.js +++ b/docs/md_v2/marketplace/frontend/marketplace.js @@ -1,5 +1,5 @@ // Marketplace JS - Magazine Layout -const API_BASE = '/api'; +const API_BASE = '/marketplace/api'; const CACHE_TTL = 3600000; // 1 hour in ms class MarketplaceCache { diff --git a/docs/md_v2/marketplace/marketplace.js b/docs/md_v2/marketplace/marketplace.js index 84022c47..7813e3ba 100644 --- a/docs/md_v2/marketplace/marketplace.js +++ b/docs/md_v2/marketplace/marketplace.js @@ -3,9 +3,9 @@ const { API_BASE, API_ORIGIN } = (() => { const { hostname, port } = window.location; if ((hostname === 'localhost' || hostname === '127.0.0.1') && port === '8000') { const origin = 'http://127.0.0.1:8100'; - return { API_BASE: `${origin}/api`, API_ORIGIN: origin }; + return { API_BASE: `${origin}/marketplace/api`, API_ORIGIN: origin }; } - return { API_BASE: '/api', API_ORIGIN: '' }; + return { API_BASE: '/marketplace/api', API_ORIGIN: '' }; })(); const resolveAssetUrl = (path) => { From abe8a92561b008e611bcfa4c603adab4901fac3e Mon Sep 17 00:00:00 2001 From: unclecode Date: Sat, 11 Oct 2025 11:51:22 +0800 Subject: [PATCH 2/4] fix(marketplace): resolve app detail page routing and styling issues - Fixed JavaScript errors from missing HTML elements (install-code, usage-code, integration-code) - Added missing CSS classes for tabs, overview layout, sidebar, and integration content - Fixed tab navigation to display horizontally in single line - Added proper padding to tab content sections (removed from container, added to content) - Fixed tab selector from .nav-tab to .tab-btn to match HTML structure - Added sidebar styling with stats grid and metadata display - Improved responsive design with mobile-friendly tab scrolling - Fixed code block positioning for copy buttons - Removed margin from first headings to prevent extra spacing - Added null checks for DOM elements in JavaScript to prevent errors These changes resolve the routing issue where clicking on apps caused page redirects, and fix the broken layout where CSS was not properly applied to the app detail page. --- docs/md_v2/marketplace/app-detail.css | 214 +++++++++++++++++- docs/md_v2/marketplace/app-detail.html | 209 +++++++++++++++++ docs/md_v2/marketplace/app-detail.js | 44 +++- docs/md_v2/marketplace/frontend/app-detail.js | 12 +- 4 files changed, 459 insertions(+), 20 deletions(-) create mode 100644 docs/md_v2/marketplace/app-detail.html diff --git a/docs/md_v2/marketplace/app-detail.css b/docs/md_v2/marketplace/app-detail.css index 9f04c13a..590bea03 100644 --- a/docs/md_v2/marketplace/app-detail.css +++ b/docs/md_v2/marketplace/app-detail.css @@ -197,6 +197,41 @@ } /* Navigation Tabs */ +.tabs { + display: flex; + flex-direction: row; + gap: 0; + border-bottom: 2px solid var(--border-color); + margin-bottom: 0; + background: var(--bg-tertiary); +} + +.tab-btn { + padding: 1rem 2rem; + background: transparent; + border: none; + border-bottom: 3px solid transparent; + color: var(--text-secondary); + cursor: pointer; + transition: all 0.2s; + font-family: inherit; + font-size: 0.95rem; + margin-bottom: -2px; + white-space: nowrap; + font-weight: 500; +} + +.tab-btn:hover { + color: var(--primary-cyan); + background: rgba(80, 255, 255, 0.05); +} + +.tab-btn.active { + color: var(--primary-cyan); + border-bottom-color: var(--primary-cyan); + background: var(--bg-secondary); +} + .app-nav { max-width: 1800px; margin: 2rem auto 0; @@ -228,34 +263,167 @@ border-bottom-color: var(--primary-cyan); } -/* Content Sections */ -.app-content { +/* Main Content Wrapper */ +.app-main { max-width: 1800px; margin: 2rem auto; padding: 0 2rem; } +/* Content Sections */ +.app-content { + background: var(--bg-secondary); + border: 1px solid var(--border-color); + padding: 0; +} + .tab-content { display: none; + padding: 2rem; } .tab-content.active { display: block; } -.docs-content { - max-width: 1200px; - padding: 2rem; +/* Overview Layout */ +.overview-columns { + display: grid; + grid-template-columns: 2fr 1fr; + gap: 2rem; +} + +.overview-main h2, .overview-main h3 { + color: var(--primary-cyan); + margin-top: 2rem; + margin-bottom: 1rem; +} + +.overview-main h2:first-child { + margin-top: 0; +} + +.overview-main h2 { + font-size: 1.8rem; + border-bottom: 2px solid var(--border-color); + padding-bottom: 0.5rem; +} + +.overview-main h3 { + font-size: 1.3rem; +} + +.features-list { + list-style: none; + padding: 0; +} + +.features-list li { + padding: 0.5rem 0; + padding-left: 1.5rem; + position: relative; + color: var(--text-secondary); +} + +.features-list li:before { + content: "▸"; + position: absolute; + left: 0; + color: var(--primary-cyan); +} + +.use-cases p { + color: var(--text-secondary); + line-height: 1.6; +} + +/* Sidebar */ +.sidebar { + display: flex; + flex-direction: column; + gap: 1rem; +} + +.sidebar-card { background: var(--bg-secondary); border: 1px solid var(--border-color); + padding: 1.5rem; +} + +.sidebar-card h3 { + font-size: 1.1rem; + color: var(--primary-cyan); + margin: 0 0 1rem 0; + border-bottom: 1px solid var(--border-color); + padding-bottom: 0.5rem; +} + +.stats-grid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 1rem; +} + +.stats-grid > div { + text-align: center; +} + +.metadata { + margin: 0; +} + +.metadata div { + display: flex; + justify-content: space-between; + padding: 0.75rem 0; + border-bottom: 1px solid var(--border-color); +} + +.metadata dt { + color: var(--text-tertiary); + font-weight: normal; +} + +.metadata dd { + color: var(--text-primary); + margin: 0; + font-weight: 600; +} + +.sidebar-card p { + color: var(--text-secondary); + margin: 0; +} + +/* Integration Content */ +.integration-content { + max-width: 100%; +} + +.integration-content h2 { + font-size: 1.8rem; + color: var(--primary-cyan); + margin: 0 0 2rem 0; + padding-bottom: 0.5rem; + border-bottom: 2px solid var(--border-color); +} + +.integration-content h3 { + font-size: 1.3rem; + color: var(--text-primary); + margin: 2rem 0 1rem; +} + +.docs-content { + max-width: 100%; } .docs-content h2 { font-size: 1.8rem; color: var(--primary-cyan); - margin-bottom: 1rem; + margin: 0 0 1.5rem 0; padding-bottom: 0.5rem; - border-bottom: 1px solid var(--border-color); + border-bottom: 2px solid var(--border-color); } .docs-content h3 { @@ -290,6 +458,7 @@ border: 1px solid var(--border-color); margin: 1rem 0; overflow: hidden; + position: relative; } .code-header { @@ -308,18 +477,23 @@ } .copy-btn { - padding: 0.25rem 0.5rem; - background: transparent; + position: absolute; + top: 0.5rem; + right: 0.5rem; + padding: 0.4rem 0.8rem; + background: var(--bg-tertiary); border: 1px solid var(--border-color); color: var(--text-secondary); cursor: pointer; font-size: 0.75rem; transition: all 0.2s; + z-index: 10; } .copy-btn:hover { border-color: var(--primary-cyan); color: var(--primary-cyan); + background: var(--bg-secondary); } .code-block pre { @@ -435,6 +609,10 @@ .app-stats { justify-content: space-around; } + + .overview-columns { + grid-template-columns: 1fr; + } } @media (max-width: 768px) { @@ -446,6 +624,16 @@ flex-direction: column; } + .tabs { + overflow-x: auto; + -webkit-overflow-scrolling: touch; + } + + .tab-btn { + padding: 0.75rem 1.5rem; + font-size: 0.875rem; + } + .app-nav { overflow-x: auto; gap: 0; @@ -459,4 +647,12 @@ .support-grid { grid-template-columns: 1fr; } + + .tab-content { + padding: 1rem; + } + + .app-main { + padding: 0 1rem; + } } \ No newline at end of file diff --git a/docs/md_v2/marketplace/app-detail.html b/docs/md_v2/marketplace/app-detail.html new file mode 100644 index 00000000..ef1138a8 --- /dev/null +++ b/docs/md_v2/marketplace/app-detail.html @@ -0,0 +1,209 @@ + + + + + + App Details - Crawl4AI Marketplace + + + + +
+ +
+
+
+
+ +

+ [ + Marketplace + ] +

+
+
+ +
+
+ + +
+
+
+ +
+
+
+ Open Source + + +
+

App Name

+

App description goes here

+ +
+
+ ★★★★★ + Rating +
+
+ 0 + Downloads +
+
+ Category + Category +
+
+ + +
+
+
+ + +
+
+
+ + + + +
+ +
+
+
+

Overview

+
Overview content goes here.
+ +

Key Features

+
    +
  • Feature 1
  • +
  • Feature 2
  • +
  • Feature 3
  • +
+ +

Use Cases

+
+

Describe how this app can help your workflow.

+
+
+ + +
+
+ +
+
+

Integration Guide

+ +

Installation

+
+
# Installation instructions will appear here
+
+ +

Basic Usage

+
+
# Usage example will appear here
+
+ +

Complete Integration Example

+
+ +
# Complete integration guide will appear here
+
+
+
+ +
+
+

Documentation

+
+

Documentation coming soon.

+
+
+
+ +
+
+

Support

+
+
+

📧 Contact

+

contact@example.com

+
+
+

🐛 Report Issues

+

Found a bug? Report it on GitHub Issues.

+
+
+

💬 Community

+

Join our Discord for help and discussions.

+
+
+
+
+
+ +
+ + + +
+ + + + diff --git a/docs/md_v2/marketplace/app-detail.js b/docs/md_v2/marketplace/app-detail.js index 622f9c88..f470bf51 100644 --- a/docs/md_v2/marketplace/app-detail.js +++ b/docs/md_v2/marketplace/app-detail.js @@ -1,5 +1,15 @@ // App Detail Page JavaScript -const API_BASE = '/marketplace/api'; +const { API_BASE, API_ORIGIN } = (() => { + const { hostname, port, protocol } = window.location; + const isLocalHost = ['localhost', '127.0.0.1', '0.0.0.0'].includes(hostname); + + if (isLocalHost && port && port !== '8100') { + const origin = `${protocol}//127.0.0.1:8100`; + return { API_BASE: `${origin}/marketplace/api`, API_ORIGIN: origin }; + } + + return { API_BASE: '/marketplace/api', API_ORIGIN: '' }; +})(); class AppDetailPage { constructor() { @@ -70,7 +80,6 @@ class AppDetailPage { document.getElementById('app-description').textContent = this.appData.description; document.getElementById('app-type').textContent = this.appData.type || 'Open Source'; document.getElementById('app-category').textContent = this.appData.category; - document.getElementById('app-pricing').textContent = this.appData.pricing || 'Free'; // Badges if (this.appData.featured) { @@ -105,6 +114,15 @@ class AppDetailPage { // Contact document.getElementById('app-contact').textContent = this.appData.contact_email || 'Not available'; + // Sidebar info + document.getElementById('sidebar-downloads').textContent = this.formatNumber(this.appData.downloads || 0); + document.getElementById('sidebar-rating').textContent = (this.appData.rating || 0).toFixed(1); + document.getElementById('sidebar-category').textContent = this.appData.category || '-'; + document.getElementById('sidebar-type').textContent = this.appData.type || '-'; + document.getElementById('sidebar-status').textContent = this.appData.status || 'Active'; + document.getElementById('sidebar-pricing').textContent = this.appData.pricing || 'Free'; + document.getElementById('sidebar-contact').textContent = this.appData.contact_email || 'contact@example.com'; + // Integration guide this.renderIntegrationGuide(); } @@ -112,24 +130,27 @@ class AppDetailPage { renderIntegrationGuide() { // Installation code const installCode = document.getElementById('install-code'); - if (this.appData.type === 'Open Source' && this.appData.github_url) { - installCode.textContent = `# Clone from GitHub + if (installCode) { + if (this.appData.type === 'Open Source' && this.appData.github_url) { + installCode.textContent = `# Clone from GitHub git clone ${this.appData.github_url} # Install dependencies pip install -r requirements.txt`; - } else if (this.appData.name.toLowerCase().includes('api')) { - installCode.textContent = `# Install via pip + } else if (this.appData.name.toLowerCase().includes('api')) { + installCode.textContent = `# Install via pip pip install ${this.appData.slug} # Or install from source pip install git+${this.appData.github_url || 'https://github.com/example/repo'}`; + } } // Usage code - customize based on category const usageCode = document.getElementById('usage-code'); - if (this.appData.category === 'Browser Automation') { - usageCode.textContent = `from crawl4ai import AsyncWebCrawler + if (usageCode) { + if (this.appData.category === 'Browser Automation') { + usageCode.textContent = `from crawl4ai import AsyncWebCrawler from ${this.appData.slug.replace(/-/g, '_')} import ${this.appData.name.replace(/\s+/g, '')} async def main(): @@ -178,11 +199,13 @@ async with AsyncWebCrawler() as crawler: extraction_strategy=strategy ) print(result.extracted_content)`; + } } // Integration example const integrationCode = document.getElementById('integration-code'); - integrationCode.textContent = this.appData.integration_guide || + if (integrationCode) { + integrationCode.textContent = this.appData.integration_guide || `# Complete ${this.appData.name} Integration Example from crawl4ai import AsyncWebCrawler @@ -237,6 +260,7 @@ async def crawl_with_${this.appData.slug.replace(/-/g, '_')}(): if __name__ == "__main__": import asyncio asyncio.run(crawl_with_${this.appData.slug.replace(/-/g, '_')}())`; + } } formatNumber(num) { @@ -250,7 +274,7 @@ if __name__ == "__main__": setupEventListeners() { // Tab switching - const tabs = document.querySelectorAll('.nav-tab'); + const tabs = document.querySelectorAll('.tab-btn'); tabs.forEach(tab => { tab.addEventListener('click', () => { // Update active tab diff --git a/docs/md_v2/marketplace/frontend/app-detail.js b/docs/md_v2/marketplace/frontend/app-detail.js index 622f9c88..5bc86d2b 100644 --- a/docs/md_v2/marketplace/frontend/app-detail.js +++ b/docs/md_v2/marketplace/frontend/app-detail.js @@ -1,5 +1,15 @@ // App Detail Page JavaScript -const API_BASE = '/marketplace/api'; +const { API_BASE, API_ORIGIN } = (() => { + const { hostname, port, protocol } = window.location; + const isLocalHost = ['localhost', '127.0.0.1', '0.0.0.0'].includes(hostname); + + if (isLocalHost && port && port !== '8100') { + const origin = `${protocol}//127.0.0.1:8100`; + return { API_BASE: `${origin}/marketplace/api`, API_ORIGIN: origin }; + } + + return { API_BASE: '/marketplace/api', API_ORIGIN: '' }; +})(); class AppDetailPage { constructor() { From 216019f29afb7cf118535093af7f2fd83b9fa5d4 Mon Sep 17 00:00:00 2001 From: unclecode Date: Sat, 11 Oct 2025 12:52:04 +0800 Subject: [PATCH 3/4] fix(marketplace): prevent hero image overflow and secondary card stretching - Fixed hero image to 200px height with min/max constraints - Added object-fit: cover to hero-image img elements - Changed secondary-featured align-items from stretch to flex-start - Fixed secondary-card height to 118px (no flex: 1 stretching) - Updated responsive grid layouts for wider screens - Added flex: 1 to hero-content for better content distribution These changes ensure a rigid, predictable layout that prevents: 1. Large images from pushing text content down 2. Single secondary cards from stretching to fill entire height --- docs/md_v2/marketplace/marketplace.css | 34 +++++++++++++++++++++----- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/docs/md_v2/marketplace/marketplace.css b/docs/md_v2/marketplace/marketplace.css index a95c4a41..57f54d22 100644 --- a/docs/md_v2/marketplace/marketplace.css +++ b/docs/md_v2/marketplace/marketplace.css @@ -231,7 +231,9 @@ a:hover { .hero-image { width: 100%; - height: 240px; + height: 200px; + min-height: 200px; + max-height: 200px; background: linear-gradient(135deg, rgba(80, 255, 255, 0.1), rgba(243, 128, 245, 0.05)); background-size: cover; background-position: center; @@ -243,6 +245,14 @@ a:hover { flex-shrink: 0; position: relative; filter: brightness(1.1) contrast(1.1); + overflow: hidden; +} + +.hero-image img { + width: 100%; + height: 100%; + object-fit: cover; + object-position: center; } .hero-image::after { @@ -257,6 +267,10 @@ a:hover { .hero-content { padding: 1.5rem; + flex: 1; + display: flex; + flex-direction: column; + justify-content: space-between; } .hero-badge { @@ -301,9 +315,9 @@ a:hover { /* Secondary Featured */ .secondary-featured { grid-column: 1 / -1; - height: 380px; + min-height: 380px; display: flex; - align-items: stretch; + align-items: flex-start; } .featured-secondary-cards { @@ -311,7 +325,7 @@ a:hover { display: flex; flex-direction: column; gap: 0.75rem; - justify-content: space-between; + align-items: stretch; } .secondary-card { @@ -321,8 +335,10 @@ a:hover { transition: all 0.3s ease; display: flex; overflow: hidden; - height: calc((380px - 1.5rem) / 3); - flex: 1; + height: 118px; + min-height: 118px; + max-height: 118px; + flex-shrink: 0; box-shadow: 0 2px 10px rgba(0, 0, 0, 0.3); } @@ -875,10 +891,13 @@ a:hover { .secondary-featured { grid-column: 3 / 5; grid-row: 1; + min-height: auto; } .featured-secondary-cards { + display: grid; grid-template-columns: repeat(2, 1fr); + flex-direction: unset; } .main-content { @@ -906,10 +925,13 @@ a:hover { .secondary-featured { grid-column: 3 / 6; + min-height: auto; } .featured-secondary-cards { + display: grid; grid-template-columns: repeat(3, 1fr); + flex-direction: unset; } .sponsored-section { From a3f057e19fc60245ef16c9b8fb1639c7b1555fb2 Mon Sep 17 00:00:00 2001 From: ntohidi Date: Mon, 13 Oct 2025 12:34:08 +0800 Subject: [PATCH 4/4] feat: Add hooks utility for function-based hooks with Docker client integration. ref #1377 Add hooks_to_string() utility function that converts Python function objects to string representations for the Docker API, enabling developers to write hooks as regular Python functions instead of strings. Core Changes: - New hooks_to_string() utility in crawl4ai/utils.py using inspect.getsource() - Docker client now accepts both function objects and strings for hooks - Automatic detection and conversion in Crawl4aiDockerClient._prepare_request() - New hooks and hooks_timeout parameters in client.crawl() method Documentation: - Docker client examples with function-based hooks (docs/examples/docker_client_hooks_example.py) - Updated main Docker deployment guide with comprehensive hooks section - Added unit tests for hooks utility (tests/docker/test_hooks_utility.py) --- crawl4ai/__init__.py | 4 +- crawl4ai/docker_client.py | 75 ++- crawl4ai/utils.py | 51 +- docs/examples/docker_client_hooks_example.py | 522 +++++++++++++++++++ docs/md_v2/core/docker-deployment.md | 397 ++++++++++++-- tests/docker/test_hooks_utility.py | 193 +++++++ 6 files changed, 1198 insertions(+), 44 deletions(-) create mode 100644 docs/examples/docker_client_hooks_example.py create mode 100644 tests/docker/test_hooks_utility.py diff --git a/crawl4ai/__init__.py b/crawl4ai/__init__.py index 6917f27e..8f1fdef4 100644 --- a/crawl4ai/__init__.py +++ b/crawl4ai/__init__.py @@ -103,7 +103,8 @@ from .browser_adapter import ( from .utils import ( start_colab_display_server, - setup_colab_environment + setup_colab_environment, + hooks_to_string ) __all__ = [ @@ -183,6 +184,7 @@ __all__ = [ "ProxyConfig", "start_colab_display_server", "setup_colab_environment", + "hooks_to_string", # C4A Script additions "c4a_compile", "c4a_validate", diff --git a/crawl4ai/docker_client.py b/crawl4ai/docker_client.py index 4e33431f..969fee7c 100644 --- a/crawl4ai/docker_client.py +++ b/crawl4ai/docker_client.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Union, AsyncGenerator, Dict, Any +from typing import List, Optional, Union, AsyncGenerator, Dict, Any, Callable import httpx import json from urllib.parse import urljoin @@ -7,6 +7,7 @@ import asyncio from .async_configs import BrowserConfig, CrawlerRunConfig from .models import CrawlResult from .async_logger import AsyncLogger, LogLevel +from .utils import hooks_to_string class Crawl4aiClientError(Exception): @@ -70,17 +71,41 @@ class Crawl4aiDockerClient: self.logger.error(f"Server unreachable: {str(e)}", tag="ERROR") raise ConnectionError(f"Cannot connect to server: {str(e)}") - def _prepare_request(self, urls: List[str], browser_config: Optional[BrowserConfig] = None, - crawler_config: Optional[CrawlerRunConfig] = None) -> Dict[str, Any]: + def _prepare_request( + self, + urls: List[str], + browser_config: Optional[BrowserConfig] = None, + crawler_config: Optional[CrawlerRunConfig] = None, + hooks: Optional[Union[Dict[str, Callable], Dict[str, str]]] = None, + hooks_timeout: int = 30 + ) -> Dict[str, Any]: """Prepare request data from configs.""" if self._token: self._http_client.headers["Authorization"] = f"Bearer {self._token}" - return { + + request_data = { "urls": urls, "browser_config": browser_config.dump() if browser_config else {}, "crawler_config": crawler_config.dump() if crawler_config else {} } + # Handle hooks if provided + if hooks: + # Check if hooks are already strings or need conversion + if any(callable(v) for v in hooks.values()): + # Convert function objects to strings + hooks_code = hooks_to_string(hooks) + else: + # Already in string format + hooks_code = hooks + + request_data["hooks"] = { + "code": hooks_code, + "timeout": hooks_timeout + } + + return request_data + async def _request(self, method: str, endpoint: str, **kwargs) -> httpx.Response: """Make an HTTP request with error handling.""" url = urljoin(self.base_url, endpoint) @@ -102,16 +127,42 @@ class Crawl4aiDockerClient: self, urls: List[str], browser_config: Optional[BrowserConfig] = None, - crawler_config: Optional[CrawlerRunConfig] = None + crawler_config: Optional[CrawlerRunConfig] = None, + hooks: Optional[Union[Dict[str, Callable], Dict[str, str]]] = None, + hooks_timeout: int = 30 ) -> Union[CrawlResult, List[CrawlResult], AsyncGenerator[CrawlResult, None]]: - """Execute a crawl operation.""" + """ + Execute a crawl operation. + + Args: + urls: List of URLs to crawl + browser_config: Browser configuration + crawler_config: Crawler configuration + hooks: Optional hooks - can be either: + - Dict[str, Callable]: Function objects that will be converted to strings + - Dict[str, str]: Already stringified hook code + hooks_timeout: Timeout in seconds for each hook execution (1-120) + + Returns: + Single CrawlResult, list of results, or async generator for streaming + + Example with function hooks: + >>> async def my_hook(page, context, **kwargs): + ... await page.set_viewport_size({"width": 1920, "height": 1080}) + ... return page + >>> + >>> result = await client.crawl( + ... ["https://example.com"], + ... hooks={"on_page_context_created": my_hook} + ... ) + """ await self._check_server() - - data = self._prepare_request(urls, browser_config, crawler_config) + + data = self._prepare_request(urls, browser_config, crawler_config, hooks, hooks_timeout) is_streaming = crawler_config and crawler_config.stream - + self.logger.info(f"Crawling {len(urls)} URLs {'(streaming)' if is_streaming else ''}", tag="CRAWL") - + if is_streaming: async def stream_results() -> AsyncGenerator[CrawlResult, None]: async with self._http_client.stream("POST", f"{self.base_url}/crawl/stream", json=data) as response: @@ -128,12 +179,12 @@ class Crawl4aiDockerClient: else: yield CrawlResult(**result) return stream_results() - + response = await self._request("POST", "/crawl", json=data) result_data = response.json() if not result_data.get("success", False): raise RequestError(f"Crawl failed: {result_data.get('msg', 'Unknown error')}") - + results = [CrawlResult(**r) for r in result_data.get("results", [])] self.logger.success(f"Crawl completed with {len(results)} results", tag="CRAWL") return results[0] if len(results) == 1 else results diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py index 046351e7..bbd7ffa2 100644 --- a/crawl4ai/utils.py +++ b/crawl4ai/utils.py @@ -47,6 +47,7 @@ from urllib.parse import ( urljoin, urlparse, urlunparse, parse_qsl, urlencode, quote, unquote ) +import inspect # Monkey patch to fix wildcard handling in urllib.robotparser @@ -3529,4 +3530,52 @@ def get_memory_stats() -> Tuple[float, float, float]: available_gb = get_true_available_memory_gb() used_percent = get_true_memory_usage_percent() - return used_percent, available_gb, total_gb \ No newline at end of file + return used_percent, available_gb, total_gb + + +# Hook utilities for Docker API +def hooks_to_string(hooks: Dict[str, Callable]) -> Dict[str, str]: + """ + Convert hook function objects to string representations for Docker API. + + This utility simplifies the process of using hooks with the Docker API by converting + Python function objects into the string format required by the API. + + Args: + hooks: Dictionary mapping hook point names to Python function objects. + Functions should be async and follow hook signature requirements. + + Returns: + Dictionary mapping hook point names to string representations of the functions. + + Example: + >>> async def my_hook(page, context, **kwargs): + ... await page.set_viewport_size({"width": 1920, "height": 1080}) + ... return page + >>> + >>> hooks_dict = {"on_page_context_created": my_hook} + >>> api_hooks = hooks_to_string(hooks_dict) + >>> # api_hooks is now ready to use with Docker API + + Raises: + ValueError: If a hook is not callable or source cannot be extracted + """ + result = {} + + for hook_name, hook_func in hooks.items(): + if not callable(hook_func): + raise ValueError(f"Hook '{hook_name}' must be a callable function, got {type(hook_func)}") + + try: + # Get the source code of the function + source = inspect.getsource(hook_func) + # Remove any leading indentation to get clean source + source = textwrap.dedent(source) + result[hook_name] = source + except (OSError, TypeError) as e: + raise ValueError( + f"Cannot extract source code for hook '{hook_name}'. " + f"Make sure the function is defined in a file (not interactively). Error: {e}" + ) + + return result diff --git a/docs/examples/docker_client_hooks_example.py b/docs/examples/docker_client_hooks_example.py new file mode 100644 index 00000000..1aa27fdc --- /dev/null +++ b/docs/examples/docker_client_hooks_example.py @@ -0,0 +1,522 @@ +#!/usr/bin/env python3 +""" +Comprehensive hooks examples using Docker Client with function objects. + +This approach is recommended because: +- Write hooks as regular Python functions +- Full IDE support (autocomplete, type checking) +- Automatic conversion to API format +- Reusable and testable code +- Clean, readable syntax +""" + +import asyncio +from crawl4ai import Crawl4aiDockerClient + +# API_BASE_URL = "http://localhost:11235" +API_BASE_URL = "http://localhost:11234" + + +# ============================================================================ +# Hook Function Definitions +# ============================================================================ + +# --- All Hooks Demo --- +async def browser_created_hook(browser, **kwargs): + """Called after browser is created""" + print("[HOOK] Browser created and ready") + return browser + + +async def page_context_hook(page, context, **kwargs): + """Setup page environment""" + print("[HOOK] Setting up page environment") + + # Set viewport + await page.set_viewport_size({"width": 1920, "height": 1080}) + + # Add cookies + await context.add_cookies([{ + "name": "test_session", + "value": "abc123xyz", + "domain": ".httpbin.org", + "path": "/" + }]) + + # Block resources + await context.route("**/*.{png,jpg,jpeg,gif}", lambda route: route.abort()) + await context.route("**/analytics/*", lambda route: route.abort()) + + print("[HOOK] Environment configured") + return page + + +async def user_agent_hook(page, context, user_agent, **kwargs): + """Called when user agent is updated""" + print(f"[HOOK] User agent: {user_agent[:50]}...") + return page + + +async def before_goto_hook(page, context, url, **kwargs): + """Called before navigating to URL""" + print(f"[HOOK] Navigating to: {url}") + + await page.set_extra_http_headers({ + "X-Custom-Header": "crawl4ai-test", + "Accept-Language": "en-US" + }) + + return page + + +async def after_goto_hook(page, context, url, response, **kwargs): + """Called after page loads""" + print(f"[HOOK] Page loaded: {url}") + + await page.wait_for_timeout(1000) + + try: + await page.wait_for_selector("body", timeout=2000) + print("[HOOK] Body element ready") + except: + print("[HOOK] Timeout, continuing") + + return page + + +async def execution_started_hook(page, context, **kwargs): + """Called when custom JS execution starts""" + print("[HOOK] JS execution started") + await page.evaluate("console.log('[HOOK] Custom JS');") + return page + + +async def before_retrieve_hook(page, context, **kwargs): + """Called before retrieving HTML""" + print("[HOOK] Preparing HTML retrieval") + + # Scroll for lazy content + await page.evaluate("window.scrollTo(0, document.body.scrollHeight);") + await page.wait_for_timeout(500) + await page.evaluate("window.scrollTo(0, 0);") + + print("[HOOK] Scrolling complete") + return page + + +async def before_return_hook(page, context, html, **kwargs): + """Called before returning HTML""" + print(f"[HOOK] HTML ready: {len(html)} chars") + + metrics = await page.evaluate('''() => ({ + images: document.images.length, + links: document.links.length, + scripts: document.scripts.length + })''') + + print(f"[HOOK] Metrics - Images: {metrics['images']}, Links: {metrics['links']}") + return page + + +# --- Authentication Hooks --- +async def auth_context_hook(page, context, **kwargs): + """Setup authentication context""" + print("[HOOK] Setting up authentication") + + # Add auth cookies + await context.add_cookies([{ + "name": "auth_token", + "value": "fake_jwt_token", + "domain": ".httpbin.org", + "path": "/", + "httpOnly": True + }]) + + # Set localStorage + await page.evaluate(''' + localStorage.setItem('user_id', '12345'); + localStorage.setItem('auth_time', new Date().toISOString()); + ''') + + print("[HOOK] Auth context ready") + return page + + +async def auth_headers_hook(page, context, url, **kwargs): + """Add authentication headers""" + print(f"[HOOK] Adding auth headers for {url}") + + import base64 + credentials = base64.b64encode(b"user:passwd").decode('ascii') + + await page.set_extra_http_headers({ + 'Authorization': f'Basic {credentials}', + 'X-API-Key': 'test-key-123' + }) + + return page + + +# --- Performance Optimization Hooks --- +async def performance_hook(page, context, **kwargs): + """Optimize page for performance""" + print("[HOOK] Optimizing for performance") + + # Block resource-heavy content + await context.route("**/*.{png,jpg,jpeg,gif,webp,svg}", lambda r: r.abort()) + await context.route("**/*.{woff,woff2,ttf}", lambda r: r.abort()) + await context.route("**/*.{mp4,webm,ogg}", lambda r: r.abort()) + await context.route("**/googletagmanager.com/*", lambda r: r.abort()) + await context.route("**/google-analytics.com/*", lambda r: r.abort()) + await context.route("**/facebook.com/*", lambda r: r.abort()) + + # Disable animations + await page.add_style_tag(content=''' + *, *::before, *::after { + animation-duration: 0s !important; + transition-duration: 0s !important; + } + ''') + + print("[HOOK] Optimizations applied") + return page + + +async def cleanup_hook(page, context, **kwargs): + """Clean page before extraction""" + print("[HOOK] Cleaning page") + + await page.evaluate('''() => { + const selectors = [ + '.ad', '.ads', '.advertisement', + '.popup', '.modal', '.overlay', + '.cookie-banner', '.newsletter' + ]; + + selectors.forEach(sel => { + document.querySelectorAll(sel).forEach(el => el.remove()); + }); + + document.querySelectorAll('script, style').forEach(el => el.remove()); + }''') + + print("[HOOK] Page cleaned") + return page + + +# --- Content Extraction Hooks --- +async def wait_dynamic_content_hook(page, context, url, response, **kwargs): + """Wait for dynamic content to load""" + print(f"[HOOK] Waiting for dynamic content on {url}") + + await page.wait_for_timeout(2000) + + # Click "Load More" if exists + try: + load_more = await page.query_selector('[class*="load-more"], button:has-text("Load More")') + if load_more: + await load_more.click() + await page.wait_for_timeout(1000) + print("[HOOK] Clicked 'Load More'") + except: + pass + + return page + + +async def extract_metadata_hook(page, context, **kwargs): + """Extract page metadata""" + print("[HOOK] Extracting metadata") + + metadata = await page.evaluate('''() => { + const getMeta = (name) => { + const el = document.querySelector(`meta[name="${name}"], meta[property="${name}"]`); + return el ? el.getAttribute('content') : null; + }; + + return { + title: document.title, + description: getMeta('description'), + author: getMeta('author'), + keywords: getMeta('keywords'), + }; + }''') + + print(f"[HOOK] Metadata: {metadata}") + + # Infinite scroll + for i in range(3): + await page.evaluate("window.scrollTo(0, document.body.scrollHeight);") + await page.wait_for_timeout(1000) + print(f"[HOOK] Scroll {i+1}/3") + + return page + + +# --- Multi-URL Hooks --- +async def url_specific_hook(page, context, url, **kwargs): + """Apply URL-specific logic""" + print(f"[HOOK] Processing URL: {url}") + + # URL-specific headers + if 'html' in url: + await page.set_extra_http_headers({"X-Type": "HTML"}) + elif 'json' in url: + await page.set_extra_http_headers({"X-Type": "JSON"}) + + return page + + +async def track_progress_hook(page, context, url, response, **kwargs): + """Track crawl progress""" + status = response.status if response else 'unknown' + print(f"[HOOK] Loaded {url} - Status: {status}") + return page + + +# ============================================================================ +# Test Functions +# ============================================================================ + +async def test_all_hooks_comprehensive(): + """Test all 8 hook types""" + print("=" * 70) + print("Test 1: All Hooks Comprehensive Demo (Docker Client)") + print("=" * 70) + + async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client: + print("\nCrawling with all 8 hooks...") + + # Define hooks with function objects + hooks = { + "on_browser_created": browser_created_hook, + "on_page_context_created": page_context_hook, + "on_user_agent_updated": user_agent_hook, + "before_goto": before_goto_hook, + "after_goto": after_goto_hook, + "on_execution_started": execution_started_hook, + "before_retrieve_html": before_retrieve_hook, + "before_return_html": before_return_hook + } + + result = await client.crawl( + ["https://httpbin.org/html"], + hooks=hooks, + hooks_timeout=30 + ) + + print("\n✅ Success!") + print(f" URL: {result.url}") + print(f" Success: {result.success}") + print(f" HTML: {len(result.html)} chars") + + +async def test_authentication_workflow(): + """Test authentication with hooks""" + print("\n" + "=" * 70) + print("Test 2: Authentication Workflow (Docker Client)") + print("=" * 70) + + async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client: + print("\nTesting authentication...") + + hooks = { + "on_page_context_created": auth_context_hook, + "before_goto": auth_headers_hook + } + + result = await client.crawl( + ["https://httpbin.org/basic-auth/user/passwd"], + hooks=hooks, + hooks_timeout=15 + ) + + print("\n✅ Authentication completed") + + if result.success: + if '"authenticated"' in result.html and 'true' in result.html: + print(" ✅ Basic auth successful!") + else: + print(" ⚠️ Auth status unclear") + else: + print(f" ❌ Failed: {result.error_message}") + + +async def test_performance_optimization(): + """Test performance optimization""" + print("\n" + "=" * 70) + print("Test 3: Performance Optimization (Docker Client)") + print("=" * 70) + + async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client: + print("\nTesting performance hooks...") + + hooks = { + "on_page_context_created": performance_hook, + "before_retrieve_html": cleanup_hook + } + + result = await client.crawl( + ["https://httpbin.org/html"], + hooks=hooks, + hooks_timeout=10 + ) + + print("\n✅ Optimization completed") + print(f" HTML size: {len(result.html):,} chars") + print(" Resources blocked, ads removed") + + +async def test_content_extraction(): + """Test content extraction""" + print("\n" + "=" * 70) + print("Test 4: Content Extraction (Docker Client)") + print("=" * 70) + + async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client: + print("\nTesting extraction hooks...") + + hooks = { + "after_goto": wait_dynamic_content_hook, + "before_retrieve_html": extract_metadata_hook + } + + result = await client.crawl( + ["https://www.kidocode.com/"], + hooks=hooks, + hooks_timeout=20 + ) + + print("\n✅ Extraction completed") + print(f" URL: {result.url}") + print(f" Success: {result.success}") + print(f" Metadata: {result.metadata}") + + +async def test_multi_url_crawl(): + """Test hooks with multiple URLs""" + print("\n" + "=" * 70) + print("Test 5: Multi-URL Crawl (Docker Client)") + print("=" * 70) + + async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client: + print("\nCrawling multiple URLs...") + + hooks = { + "before_goto": url_specific_hook, + "after_goto": track_progress_hook + } + + results = await client.crawl( + [ + "https://httpbin.org/html", + "https://httpbin.org/json", + "https://httpbin.org/xml" + ], + hooks=hooks, + hooks_timeout=15 + ) + + print("\n✅ Multi-URL crawl completed") + print(f"\n Crawled {len(results)} URLs:") + for i, result in enumerate(results, 1): + status = "✅" if result.success else "❌" + print(f" {status} {i}. {result.url}") + + +async def test_reusable_hook_library(): + """Test using reusable hook library""" + print("\n" + "=" * 70) + print("Test 6: Reusable Hook Library (Docker Client)") + print("=" * 70) + + # Create a library of reusable hooks + class HookLibrary: + @staticmethod + async def block_images(page, context, **kwargs): + """Block all images""" + await context.route("**/*.{png,jpg,jpeg,gif}", lambda r: r.abort()) + print("[LIBRARY] Images blocked") + return page + + @staticmethod + async def block_analytics(page, context, **kwargs): + """Block analytics""" + await context.route("**/analytics/*", lambda r: r.abort()) + await context.route("**/google-analytics.com/*", lambda r: r.abort()) + print("[LIBRARY] Analytics blocked") + return page + + @staticmethod + async def scroll_infinite(page, context, **kwargs): + """Handle infinite scroll""" + for i in range(5): + prev = await page.evaluate("document.body.scrollHeight") + await page.evaluate("window.scrollTo(0, document.body.scrollHeight);") + await page.wait_for_timeout(1000) + curr = await page.evaluate("document.body.scrollHeight") + if curr == prev: + break + print("[LIBRARY] Infinite scroll complete") + return page + + async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client: + print("\nUsing hook library...") + + hooks = { + "on_page_context_created": HookLibrary.block_images, + "before_retrieve_html": HookLibrary.scroll_infinite + } + + result = await client.crawl( + ["https://www.kidocode.com/"], + hooks=hooks, + hooks_timeout=20 + ) + + print("\n✅ Library hooks completed") + print(f" Success: {result.success}") + + +# ============================================================================ +# Main +# ============================================================================ + +async def main(): + """Run all Docker client hook examples""" + print("🔧 Crawl4AI Docker Client - Hooks Examples (Function-Based)") + print("Using Python function objects with automatic conversion") + print("=" * 70) + + tests = [ + ("All Hooks Demo", test_all_hooks_comprehensive), + ("Authentication", test_authentication_workflow), + ("Performance", test_performance_optimization), + ("Extraction", test_content_extraction), + ("Multi-URL", test_multi_url_crawl), + ("Hook Library", test_reusable_hook_library) + ] + + for i, (name, test_func) in enumerate(tests, 1): + try: + await test_func() + print(f"\n✅ Test {i}/{len(tests)}: {name} completed\n") + except Exception as e: + print(f"\n❌ Test {i}/{len(tests)}: {name} failed: {e}\n") + import traceback + traceback.print_exc() + + print("=" * 70) + print("🎉 All Docker client hook examples completed!") + print("\n💡 Key Benefits of Function-Based Hooks:") + print(" • Write as regular Python functions") + print(" • Full IDE support (autocomplete, types)") + print(" • Automatic conversion to API format") + print(" • Reusable across projects") + print(" • Clean, readable code") + print(" • Easy to test and debug") + print("=" * 70) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/md_v2/core/docker-deployment.md b/docs/md_v2/core/docker-deployment.md index ea3692b2..36bf28e1 100644 --- a/docs/md_v2/core/docker-deployment.md +++ b/docs/md_v2/core/docker-deployment.md @@ -6,18 +6,6 @@ - [Option 1: Using Pre-built Docker Hub Images (Recommended)](#option-1-using-pre-built-docker-hub-images-recommended) - [Option 2: Using Docker Compose](#option-2-using-docker-compose) - [Option 3: Manual Local Build & Run](#option-3-manual-local-build--run) -- [Dockerfile Parameters](#dockerfile-parameters) -- [Using the API](#using-the-api) - - [Playground Interface](#playground-interface) - - [Python SDK](#python-sdk) - - [Understanding Request Schema](#understanding-request-schema) - - [REST API Examples](#rest-api-examples) -- [Additional API Endpoints](#additional-api-endpoints) - - [HTML Extraction Endpoint](#html-extraction-endpoint) - - [Screenshot Endpoint](#screenshot-endpoint) - - [PDF Export Endpoint](#pdf-export-endpoint) - - [JavaScript Execution Endpoint](#javascript-execution-endpoint) - - [Library Context Endpoint](#library-context-endpoint) - [MCP (Model Context Protocol) Support](#mcp-model-context-protocol-support) - [What is MCP?](#what-is-mcp) - [Connecting via MCP](#connecting-via-mcp) @@ -25,9 +13,28 @@ - [Available MCP Tools](#available-mcp-tools) - [Testing MCP Connections](#testing-mcp-connections) - [MCP Schemas](#mcp-schemas) +- [Additional API Endpoints](#additional-api-endpoints) + - [HTML Extraction Endpoint](#html-extraction-endpoint) + - [Screenshot Endpoint](#screenshot-endpoint) + - [PDF Export Endpoint](#pdf-export-endpoint) + - [JavaScript Execution Endpoint](#javascript-execution-endpoint) +- [User-Provided Hooks API](#user-provided-hooks-api) + - [Hook Information Endpoint](#hook-information-endpoint) + - [Available Hook Points](#available-hook-points) + - [Using Hooks in Requests](#using-hooks-in-requests) + - [Hook Examples with Real URLs](#hook-examples-with-real-urls) + - [Security Best Practices](#security-best-practices) + - [Hook Response Information](#hook-response-information) + - [Error Handling](#error-handling) + - [Hooks Utility: Function-Based Approach (Python)](#hooks-utility-function-based-approach-python) +- [Dockerfile Parameters](#dockerfile-parameters) +- [Using the API](#using-the-api) + - [Playground Interface](#playground-interface) + - [Python SDK](#python-sdk) + - [Understanding Request Schema](#understanding-request-schema) + - [REST API Examples](#rest-api-examples) + - [LLM Configuration Examples](#llm-configuration-examples) - [Metrics & Monitoring](#metrics--monitoring) -- [Deployment Scenarios](#deployment-scenarios) -- [Complete Examples](#complete-examples) - [Server Configuration](#server-configuration) - [Understanding config.yml](#understanding-configyml) - [JWT Authentication](#jwt-authentication) @@ -832,6 +839,275 @@ else: > 💡 **Remember**: Always test your hooks on safe, known websites first before using them on production sites. Never crawl sites that you don't have permission to access or that might be malicious. +### Hooks Utility: Function-Based Approach (Python) + +For Python developers, Crawl4AI provides a more convenient way to work with hooks using the `hooks_to_string()` utility function and Docker client integration. + +#### Why Use Function-Based Hooks? + +**String-Based Approach (shown above)**: +```python +hooks_code = { + "on_page_context_created": """ +async def hook(page, context, **kwargs): + await page.set_viewport_size({"width": 1920, "height": 1080}) + return page +""" +} +``` + +**Function-Based Approach (recommended for Python)**: +```python +from crawl4ai import Crawl4aiDockerClient + +async def my_hook(page, context, **kwargs): + await page.set_viewport_size({"width": 1920, "height": 1080}) + return page + +async with Crawl4aiDockerClient(base_url="http://localhost:11235") as client: + result = await client.crawl( + ["https://example.com"], + hooks={"on_page_context_created": my_hook} + ) +``` + +**Benefits**: +- ✅ Write hooks as regular Python functions +- ✅ Full IDE support (autocomplete, syntax highlighting, type checking) +- ✅ Easy to test and debug +- ✅ Reusable hook libraries +- ✅ Automatic conversion to API format + +#### Using the Hooks Utility + +The `hooks_to_string()` utility converts Python function objects to the string format required by the API: + +```python +from crawl4ai import hooks_to_string + +# Define your hooks as functions +async def setup_hook(page, context, **kwargs): + await page.set_viewport_size({"width": 1920, "height": 1080}) + await context.add_cookies([{ + "name": "session", + "value": "token", + "domain": ".example.com" + }]) + return page + +async def scroll_hook(page, context, **kwargs): + await page.evaluate("window.scrollTo(0, document.body.scrollHeight)") + return page + +# Convert to string format +hooks_dict = { + "on_page_context_created": setup_hook, + "before_retrieve_html": scroll_hook +} +hooks_string = hooks_to_string(hooks_dict) + +# Now use with REST API or Docker client +# hooks_string contains the string representations +``` + +#### Docker Client with Automatic Conversion + +The Docker client automatically detects and converts function objects: + +```python +from crawl4ai import Crawl4aiDockerClient + +async def auth_hook(page, context, **kwargs): + """Add authentication cookies""" + await context.add_cookies([{ + "name": "auth_token", + "value": "your_token", + "domain": ".example.com" + }]) + return page + +async def performance_hook(page, context, **kwargs): + """Block unnecessary resources""" + await context.route("**/*.{png,jpg,gif}", lambda r: r.abort()) + await context.route("**/analytics/*", lambda r: r.abort()) + return page + +async with Crawl4aiDockerClient(base_url="http://localhost:11235") as client: + # Pass functions directly - automatic conversion! + result = await client.crawl( + ["https://example.com"], + hooks={ + "on_page_context_created": performance_hook, + "before_goto": auth_hook + }, + hooks_timeout=30 # Optional timeout in seconds (1-120) + ) + + print(f"Success: {result.success}") + print(f"HTML: {len(result.html)} chars") +``` + +#### Creating Reusable Hook Libraries + +Build collections of reusable hooks: + +```python +# hooks_library.py +class CrawlHooks: + """Reusable hook collection for common crawling tasks""" + + @staticmethod + async def block_images(page, context, **kwargs): + """Block all images to speed up crawling""" + await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda r: r.abort()) + return page + + @staticmethod + async def block_analytics(page, context, **kwargs): + """Block analytics and tracking scripts""" + tracking_domains = [ + "**/google-analytics.com/*", + "**/googletagmanager.com/*", + "**/facebook.com/tr/*", + "**/doubleclick.net/*" + ] + for domain in tracking_domains: + await context.route(domain, lambda r: r.abort()) + return page + + @staticmethod + async def scroll_infinite(page, context, **kwargs): + """Handle infinite scroll to load more content""" + previous_height = 0 + for i in range(5): # Max 5 scrolls + current_height = await page.evaluate("document.body.scrollHeight") + if current_height == previous_height: + break + await page.evaluate("window.scrollTo(0, document.body.scrollHeight)") + await page.wait_for_timeout(1000) + previous_height = current_height + return page + + @staticmethod + async def wait_for_dynamic_content(page, context, url, response, **kwargs): + """Wait for dynamic content to load""" + await page.wait_for_timeout(2000) + try: + # Click "Load More" if present + load_more = await page.query_selector('[class*="load-more"]') + if load_more: + await load_more.click() + await page.wait_for_timeout(1000) + except: + pass + return page + +# Use in your application +from hooks_library import CrawlHooks +from crawl4ai import Crawl4aiDockerClient + +async def crawl_with_optimizations(url): + async with Crawl4aiDockerClient() as client: + result = await client.crawl( + [url], + hooks={ + "on_page_context_created": CrawlHooks.block_images, + "before_retrieve_html": CrawlHooks.scroll_infinite + } + ) + return result +``` + +#### Choosing the Right Approach + +| Approach | Best For | IDE Support | Language | +|----------|----------|-------------|----------| +| **String-based** | Non-Python clients, REST APIs, other languages | ❌ None | Any | +| **Function-based** | Python applications, local development | ✅ Full | Python only | +| **Docker Client** | Python apps with automatic conversion | ✅ Full | Python only | + +**Recommendation**: +- **Python applications**: Use Docker client with function objects (easiest) +- **Non-Python or REST API**: Use string-based hooks (most flexible) +- **Manual control**: Use `hooks_to_string()` utility (middle ground) + +#### Complete Example with Function Hooks + +```python +from crawl4ai import Crawl4aiDockerClient, BrowserConfig, CrawlerRunConfig, CacheMode + +# Define hooks as regular Python functions +async def setup_environment(page, context, **kwargs): + """Setup crawling environment""" + # Set viewport + await page.set_viewport_size({"width": 1920, "height": 1080}) + + # Block resources for speed + await context.route("**/*.{png,jpg,gif}", lambda r: r.abort()) + + # Add custom headers + await page.set_extra_http_headers({ + "Accept-Language": "en-US", + "X-Custom-Header": "Crawl4AI" + }) + + print("[HOOK] Environment configured") + return page + +async def extract_content(page, context, **kwargs): + """Extract and prepare content""" + # Scroll to load lazy content + await page.evaluate("window.scrollTo(0, document.body.scrollHeight)") + await page.wait_for_timeout(1000) + + # Extract metadata + metadata = await page.evaluate('''() => ({ + title: document.title, + links: document.links.length, + images: document.images.length + })''') + + print(f"[HOOK] Page metadata: {metadata}") + return page + +async def main(): + async with Crawl4aiDockerClient(base_url="http://localhost:11235", verbose=True) as client: + # Configure crawl + browser_config = BrowserConfig(headless=True) + crawler_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS) + + # Crawl with hooks + result = await client.crawl( + ["https://httpbin.org/html"], + browser_config=browser_config, + crawler_config=crawler_config, + hooks={ + "on_page_context_created": setup_environment, + "before_retrieve_html": extract_content + }, + hooks_timeout=30 + ) + + if result.success: + print(f"✅ Crawl successful!") + print(f" URL: {result.url}") + print(f" HTML: {len(result.html)} chars") + print(f" Markdown: {len(result.markdown)} chars") + else: + print(f"❌ Crawl failed: {result.error_message}") + +if __name__ == "__main__": + import asyncio + asyncio.run(main()) +``` + +#### Additional Resources + +- **Comprehensive Examples**: See `/docs/examples/hooks_docker_client_example.py` for Python function-based examples +- **REST API Examples**: See `/docs/examples/hooks_rest_api_example.py` for string-based examples +- **Comparison Guide**: See `/docs/examples/README_HOOKS.md` for detailed comparison +- **Utility Documentation**: See `/docs/hooks-utility-guide.md` for complete guide + --- ## Dockerfile Parameters @@ -892,10 +1168,12 @@ This is the easiest way to translate Python configuration to JSON requests when Install the SDK: `pip install crawl4ai` +The Python SDK provides a convenient way to interact with the Docker API, including **automatic hook conversion** when using function objects. + ```python import asyncio from crawl4ai.docker_client import Crawl4aiDockerClient -from crawl4ai import BrowserConfig, CrawlerRunConfig, CacheMode # Assuming you have crawl4ai installed +from crawl4ai import BrowserConfig, CrawlerRunConfig, CacheMode async def main(): # Point to the correct server port @@ -907,23 +1185,22 @@ async def main(): print("--- Running Non-Streaming Crawl ---") results = await client.crawl( ["https://httpbin.org/html"], - browser_config=BrowserConfig(headless=True), # Use library classes for config aid + browser_config=BrowserConfig(headless=True), crawler_config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS) ) - if results: # client.crawl returns None on failure - print(f"Non-streaming results success: {results.success}") - if results.success: - for result in results: # Iterate through the CrawlResultContainer - print(f"URL: {result.url}, Success: {result.success}") + if results: + print(f"Non-streaming results success: {results.success}") + if results.success: + for result in results: + print(f"URL: {result.url}, Success: {result.success}") else: print("Non-streaming crawl failed.") - # Example Streaming crawl print("\n--- Running Streaming Crawl ---") stream_config = CrawlerRunConfig(stream=True, cache_mode=CacheMode.BYPASS) try: - async for result in await client.crawl( # client.crawl returns an async generator for streaming + async for result in await client.crawl( ["https://httpbin.org/html", "https://httpbin.org/links/5/0"], browser_config=BrowserConfig(headless=True), crawler_config=stream_config @@ -932,17 +1209,56 @@ async def main(): except Exception as e: print(f"Streaming crawl failed: {e}") + # Example with hooks (Python function objects) + print("\n--- Crawl with Hooks ---") + + async def my_hook(page, context, **kwargs): + """Custom hook to optimize performance""" + await page.set_viewport_size({"width": 1920, "height": 1080}) + await context.route("**/*.{png,jpg}", lambda r: r.abort()) + print("[HOOK] Page optimized") + return page + + result = await client.crawl( + ["https://httpbin.org/html"], + browser_config=BrowserConfig(headless=True), + crawler_config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS), + hooks={"on_page_context_created": my_hook}, # Pass function directly! + hooks_timeout=30 + ) + print(f"Crawl with hooks success: {result.success}") # Example Get schema print("\n--- Getting Schema ---") schema = await client.get_schema() - print(f"Schema received: {bool(schema)}") # Print whether schema was received + print(f"Schema received: {bool(schema)}") if __name__ == "__main__": asyncio.run(main()) ``` -*(SDK parameters like timeout, verify_ssl etc. remain the same)* +#### SDK Parameters + +The Docker client supports the following parameters: + +**Client Initialization**: +- `base_url` (str): URL of the Docker server (default: `http://localhost:8000`) +- `timeout` (float): Request timeout in seconds (default: 30.0) +- `verify_ssl` (bool): Verify SSL certificates (default: True) +- `verbose` (bool): Enable verbose logging (default: True) +- `log_file` (Optional[str]): Path to log file (default: None) + +**crawl() Method**: +- `urls` (List[str]): List of URLs to crawl +- `browser_config` (Optional[BrowserConfig]): Browser configuration +- `crawler_config` (Optional[CrawlerRunConfig]): Crawler configuration +- `hooks` (Optional[Dict]): Hook functions or strings - **automatically converts function objects!** +- `hooks_timeout` (int): Timeout for each hook execution in seconds (default: 30) + +**Returns**: +- Single URL: `CrawlResult` object +- Multiple URLs: `List[CrawlResult]` +- Streaming: `AsyncGenerator[CrawlResult]` ### Second Approach: Direct API Calls @@ -1352,19 +1668,40 @@ We're here to help you succeed with Crawl4AI! Here's how to get support: In this guide, we've covered everything you need to get started with Crawl4AI's Docker deployment: - Building and running the Docker container -- Configuring the environment +- Configuring the environment - Using the interactive playground for testing - Making API requests with proper typing -- Using the Python SDK +- Using the Python SDK with **automatic hook conversion** +- **Working with hooks** - both string-based (REST API) and function-based (Python SDK) - Leveraging specialized endpoints for screenshots, PDFs, and JavaScript execution - Connecting via the Model Context Protocol (MCP) - Monitoring your deployment -The new playground interface at `http://localhost:11235/playground` makes it much easier to test configurations and generate the corresponding JSON for API requests. +### Key Features -For AI application developers, the MCP integration allows tools like Claude Code to directly access Crawl4AI's capabilities without complex API handling. +**Hooks Support**: Crawl4AI offers two approaches for working with hooks: +- **String-based** (REST API): Works with any language, requires manual string formatting +- **Function-based** (Python SDK): Write hooks as regular Python functions with full IDE support and automatic conversion -Remember, the examples in the `examples` folder are your friends - they show real-world usage patterns that you can adapt for your needs. +**Playground Interface**: The built-in playground at `http://localhost:11235/playground` makes it easy to test configurations and generate corresponding JSON for API requests. + +**MCP Integration**: For AI application developers, the MCP integration allows tools like Claude Code to directly access Crawl4AI's capabilities without complex API handling. + +### Next Steps + +1. **Explore Examples**: Check out the comprehensive examples in: + - `/docs/examples/hooks_docker_client_example.py` - Python function-based hooks + - `/docs/examples/hooks_rest_api_example.py` - REST API string-based hooks + - `/docs/examples/README_HOOKS.md` - Comparison and guide + +2. **Read Documentation**: + - `/docs/hooks-utility-guide.md` - Complete hooks utility guide + - API documentation for detailed configuration options + +3. **Join the Community**: + - GitHub: Report issues and contribute + - Discord: Get help and share your experiences + - Documentation: Comprehensive guides and tutorials Keep exploring, and don't hesitate to reach out if you need help! We're building something amazing together. 🚀 diff --git a/tests/docker/test_hooks_utility.py b/tests/docker/test_hooks_utility.py new file mode 100644 index 00000000..7c820e56 --- /dev/null +++ b/tests/docker/test_hooks_utility.py @@ -0,0 +1,193 @@ +""" +Test script demonstrating the hooks_to_string utility and Docker client integration. +""" +import asyncio +from crawl4ai import Crawl4aiDockerClient, hooks_to_string + + +# Define hook functions as regular Python functions +async def auth_hook(page, context, **kwargs): + """Add authentication cookies.""" + await context.add_cookies([{ + 'name': 'test_cookie', + 'value': 'test_value', + 'domain': '.httpbin.org', + 'path': '/' + }]) + return page + + +async def scroll_hook(page, context, **kwargs): + """Scroll to load lazy content.""" + await page.evaluate("window.scrollTo(0, document.body.scrollHeight)") + await page.wait_for_timeout(1000) + return page + + +async def viewport_hook(page, context, **kwargs): + """Set custom viewport.""" + await page.set_viewport_size({"width": 1920, "height": 1080}) + return page + + +async def test_hooks_utility(): + """Test the hooks_to_string utility function.""" + print("=" * 60) + print("Testing hooks_to_string utility") + print("=" * 60) + + # Create hooks dictionary with function objects + hooks_dict = { + "on_page_context_created": auth_hook, + "before_retrieve_html": scroll_hook + } + + # Convert to string format + hooks_string = hooks_to_string(hooks_dict) + + print("\n✓ Successfully converted function objects to strings") + print(f"\n✓ Converted {len(hooks_string)} hooks:") + for hook_name in hooks_string.keys(): + print(f" - {hook_name}") + + print("\n✓ Preview of converted hook:") + print("-" * 60) + print(hooks_string["on_page_context_created"][:200] + "...") + print("-" * 60) + + return hooks_string + + +async def test_docker_client_with_functions(): + """Test Docker client with function objects (automatic conversion).""" + print("\n" + "=" * 60) + print("Testing Docker Client with Function Objects") + print("=" * 60) + + # Note: This requires a running Crawl4AI Docker server + # Uncomment the following to test with actual server: + + async with Crawl4aiDockerClient(base_url="http://localhost:11234", verbose=True) as client: + # Pass function objects directly - they'll be converted automatically + result = await client.crawl( + ["https://httpbin.org/html"], + hooks={ + "on_page_context_created": auth_hook, + "before_retrieve_html": scroll_hook + }, + hooks_timeout=30 + ) + print(f"\n✓ Crawl successful: {result.success}") + print(f"✓ URL: {result.url}") + + print("\n✓ Docker client accepts function objects directly") + print("✓ Automatic conversion happens internally") + print("✓ No manual string formatting needed!") + + +async def test_docker_client_with_strings(): + """Test Docker client with pre-converted strings.""" + print("\n" + "=" * 60) + print("Testing Docker Client with String Hooks") + print("=" * 60) + + # Convert hooks to strings first + hooks_dict = { + "on_page_context_created": viewport_hook, + "before_retrieve_html": scroll_hook + } + hooks_string = hooks_to_string(hooks_dict) + + # Note: This requires a running Crawl4AI Docker server + # Uncomment the following to test with actual server: + + async with Crawl4aiDockerClient(base_url="http://localhost:11234", verbose=True) as client: + # Pass string hooks - they'll be used as-is + result = await client.crawl( + ["https://httpbin.org/html"], + hooks=hooks_string, + hooks_timeout=30 + ) + print(f"\n✓ Crawl successful: {result.success}") + + print("\n✓ Docker client also accepts pre-converted strings") + print("✓ Backward compatible with existing code") + + +async def show_usage_patterns(): + """Show different usage patterns.""" + print("\n" + "=" * 60) + print("Usage Patterns") + print("=" * 60) + + print("\n1. Direct function usage (simplest):") + print("-" * 60) + print(""" + async def my_hook(page, context, **kwargs): + await page.set_viewport_size({"width": 1920, "height": 1080}) + return page + + result = await client.crawl( + ["https://example.com"], + hooks={"on_page_context_created": my_hook} + ) + """) + + print("\n2. Convert then use:") + print("-" * 60) + print(""" + hooks_dict = {"on_page_context_created": my_hook} + hooks_string = hooks_to_string(hooks_dict) + + result = await client.crawl( + ["https://example.com"], + hooks=hooks_string + ) + """) + + print("\n3. Manual string (backward compatible):") + print("-" * 60) + print(""" + hooks_string = { + "on_page_context_created": ''' +async def hook(page, context, **kwargs): + await page.set_viewport_size({"width": 1920, "height": 1080}) + return page +''' + } + + result = await client.crawl( + ["https://example.com"], + hooks=hooks_string + ) + """) + + +async def main(): + """Run all tests.""" + print("\n🚀 Crawl4AI Hooks Utility Test Suite\n") + + # Test the utility function + # await test_hooks_utility() + + # Show usage with Docker client + # await test_docker_client_with_functions() + await test_docker_client_with_strings() + + # Show different patterns + # await show_usage_patterns() + + # print("\n" + "=" * 60) + # print("✓ All tests completed successfully!") + # print("=" * 60) + # print("\nKey Benefits:") + # print(" • Write hooks as regular Python functions") + # print(" • IDE support with autocomplete and type checking") + # print(" • Automatic conversion to API format") + # print(" • Backward compatible with string hooks") + # print(" • Same utility used everywhere") + # print("\n") + + +if __name__ == "__main__": + asyncio.run(main())