Compare commits
71 Commits
docker/bas
...
docker/fix
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
05ec0535cd | ||
|
|
83aeb565ee | ||
|
|
c0f1865287 | ||
|
|
46ef1116c4 | ||
|
|
4df83893ac | ||
|
|
13e116610d | ||
|
|
b74524fdfb | ||
|
|
bcac486921 | ||
|
|
6aef5a120f | ||
|
|
7cac008c10 | ||
|
|
7e8fb3a8f3 | ||
|
|
3efb59fb9a | ||
|
|
c7b7475b92 | ||
|
|
b71d624168 | ||
|
|
d670dcde0a | ||
|
|
f8606f6865 | ||
|
|
52da8d72bc | ||
|
|
8b7e67566e | ||
|
|
7388baa205 | ||
|
|
897bc3a493 | ||
|
|
8a37710313 | ||
|
|
97c92c4f62 | ||
|
|
f6a02c4358 | ||
|
|
6d1a398419 | ||
|
|
c107617920 | ||
|
|
69d0ef89dd | ||
|
|
1bf85bcb1a | ||
|
|
749232ba1a | ||
|
|
c7288dd2f1 | ||
|
|
fdbcddbf1a | ||
|
|
564d437d97 | ||
|
|
9cd06ea7eb | ||
|
|
c91b235cb7 | ||
|
|
eb257c2ba3 | ||
|
|
8d364a0731 | ||
|
|
6aff0e55aa | ||
|
|
38a0742708 | ||
|
|
a720a3a9fe | ||
|
|
017144c2dd | ||
|
|
32887ea40d | ||
|
|
eea41bf1ca | ||
|
|
21c302f439 | ||
|
|
8fc1747225 | ||
|
|
aadab30c3d | ||
|
|
4a04b8506a | ||
|
|
7dadb65b80 | ||
|
|
a3f057e19f | ||
|
|
216019f29a | ||
|
|
abe8a92561 | ||
|
|
5a4f21fad9 | ||
|
|
611d48f93b | ||
|
|
936397ee0e | ||
|
|
2c373f0642 | ||
|
|
d2c7f345ab | ||
|
|
8c62277718 | ||
|
|
5145d42df7 | ||
|
|
9900f63f97 | ||
|
|
9292b265fc | ||
|
|
80aa6c11d9 | ||
|
|
749d200866 | ||
|
|
408ad1b750 | ||
|
|
35dd206925 | ||
|
|
8d30662647 | ||
|
|
ef46df10da | ||
|
|
0d8d043109 | ||
|
|
70af81d9d7 | ||
|
|
361499d291 | ||
|
|
3fe49a766c | ||
|
|
fef715a891 | ||
|
|
69e8ca3d0d | ||
|
|
be63c98db3 |
81
.github/workflows/docker-release.yml
vendored
Normal file
81
.github/workflows/docker-release.yml
vendored
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
name: Docker Release
|
||||||
|
on:
|
||||||
|
release:
|
||||||
|
types: [published]
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- 'docker-rebuild-v*' # Allow manual Docker rebuilds via tags
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
docker:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Extract version from release or tag
|
||||||
|
id: get_version
|
||||||
|
run: |
|
||||||
|
if [ "${{ github.event_name }}" == "release" ]; then
|
||||||
|
# Triggered by release event
|
||||||
|
VERSION="${{ github.event.release.tag_name }}"
|
||||||
|
VERSION=${VERSION#v} # Remove 'v' prefix
|
||||||
|
else
|
||||||
|
# Triggered by docker-rebuild-v* tag
|
||||||
|
VERSION=${GITHUB_REF#refs/tags/docker-rebuild-v}
|
||||||
|
fi
|
||||||
|
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
|
||||||
|
echo "Building Docker images for version: $VERSION"
|
||||||
|
|
||||||
|
- name: Extract major and minor versions
|
||||||
|
id: versions
|
||||||
|
run: |
|
||||||
|
VERSION=${{ steps.get_version.outputs.VERSION }}
|
||||||
|
MAJOR=$(echo $VERSION | cut -d. -f1)
|
||||||
|
MINOR=$(echo $VERSION | cut -d. -f1-2)
|
||||||
|
echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT
|
||||||
|
echo "MINOR=$MINOR" >> $GITHUB_OUTPUT
|
||||||
|
echo "Semantic versions - Major: $MAJOR, Minor: $MINOR"
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Log in to Docker Hub
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
username: ${{ secrets.DOCKER_USERNAME }}
|
||||||
|
password: ${{ secrets.DOCKER_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build and push Docker images
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
tags: |
|
||||||
|
unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}
|
||||||
|
unclecode/crawl4ai:${{ steps.versions.outputs.MINOR }}
|
||||||
|
unclecode/crawl4ai:${{ steps.versions.outputs.MAJOR }}
|
||||||
|
unclecode/crawl4ai:latest
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
- name: Summary
|
||||||
|
run: |
|
||||||
|
echo "## 🐳 Docker Release Complete!" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "### Published Images" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- \`unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}\`" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- \`unclecode/crawl4ai:${{ steps.versions.outputs.MINOR }}\`" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- \`unclecode/crawl4ai:${{ steps.versions.outputs.MAJOR }}\`" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- \`unclecode/crawl4ai:latest\`" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "### Platforms" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- linux/amd64" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- linux/arm64" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "### 🚀 Pull Command" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "\`\`\`bash" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "docker pull unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
||||||
917
.github/workflows/docs/ARCHITECTURE.md
vendored
Normal file
917
.github/workflows/docs/ARCHITECTURE.md
vendored
Normal file
@@ -0,0 +1,917 @@
|
|||||||
|
# Workflow Architecture Documentation
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This document describes the technical architecture of the split release pipeline for Crawl4AI.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architecture Diagram
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ Developer │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ git tag v1.2.3 │
|
||||||
|
│ git push --tags │
|
||||||
|
└──────────────────────────────┬──────────────────────────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ GitHub Repository │
|
||||||
|
│ │
|
||||||
|
│ ┌────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ Tag Event: v1.2.3 │ │
|
||||||
|
│ └────────────────────────────────────────────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ ┌────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ release.yml (Release Pipeline) │ │
|
||||||
|
│ │ ┌──────────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ 1. Extract Version │ │ │
|
||||||
|
│ │ │ v1.2.3 → 1.2.3 │ │ │
|
||||||
|
│ │ └──────────────────────────────────────────────┘ │ │
|
||||||
|
│ │ ┌──────────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ 2. Validate Version │ │ │
|
||||||
|
│ │ │ Tag == __version__.py │ │ │
|
||||||
|
│ │ └──────────────────────────────────────────────┘ │ │
|
||||||
|
│ │ ┌──────────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ 3. Build Python Package │ │ │
|
||||||
|
│ │ │ - Source dist (.tar.gz) │ │ │
|
||||||
|
│ │ │ - Wheel (.whl) │ │ │
|
||||||
|
│ │ └──────────────────────────────────────────────┘ │ │
|
||||||
|
│ │ ┌──────────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ 4. Upload to PyPI │ │ │
|
||||||
|
│ │ │ - Authenticate with token │ │ │
|
||||||
|
│ │ │ - Upload dist/* │ │ │
|
||||||
|
│ │ └──────────────────────────────────────────────┘ │ │
|
||||||
|
│ │ ┌──────────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ 5. Create GitHub Release │ │ │
|
||||||
|
│ │ │ - Tag: v1.2.3 │ │ │
|
||||||
|
│ │ │ - Body: Install instructions │ │ │
|
||||||
|
│ │ │ - Status: Published │ │ │
|
||||||
|
│ │ └──────────────────────────────────────────────┘ │ │
|
||||||
|
│ └────────────────────────────────────────────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ ┌────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ Release Event: published (v1.2.3) │ │
|
||||||
|
│ └────────────────────────────────────────────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ ┌────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ docker-release.yml (Docker Pipeline) │ │
|
||||||
|
│ │ ┌──────────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ 1. Extract Version from Release │ │ │
|
||||||
|
│ │ │ github.event.release.tag_name → 1.2.3 │ │ │
|
||||||
|
│ │ └──────────────────────────────────────────────┘ │ │
|
||||||
|
│ │ ┌──────────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ 2. Parse Semantic Versions │ │ │
|
||||||
|
│ │ │ 1.2.3 → Major: 1, Minor: 1.2 │ │ │
|
||||||
|
│ │ └──────────────────────────────────────────────┘ │ │
|
||||||
|
│ │ ┌──────────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ 3. Setup Multi-Arch Build │ │ │
|
||||||
|
│ │ │ - Docker Buildx │ │ │
|
||||||
|
│ │ │ - QEMU emulation │ │ │
|
||||||
|
│ │ └──────────────────────────────────────────────┘ │ │
|
||||||
|
│ │ ┌──────────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ 4. Authenticate Docker Hub │ │ │
|
||||||
|
│ │ │ - Username: DOCKER_USERNAME │ │ │
|
||||||
|
│ │ │ - Token: DOCKER_TOKEN │ │ │
|
||||||
|
│ │ └──────────────────────────────────────────────┘ │ │
|
||||||
|
│ │ ┌──────────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ 5. Build Multi-Arch Images │ │ │
|
||||||
|
│ │ │ ┌────────────────┬────────────────┐ │ │ │
|
||||||
|
│ │ │ │ linux/amd64 │ linux/arm64 │ │ │ │
|
||||||
|
│ │ │ └────────────────┴────────────────┘ │ │ │
|
||||||
|
│ │ │ Cache: GitHub Actions (type=gha) │ │ │
|
||||||
|
│ │ └──────────────────────────────────────────────┘ │ │
|
||||||
|
│ │ ┌──────────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ 6. Push to Docker Hub │ │ │
|
||||||
|
│ │ │ Tags: │ │ │
|
||||||
|
│ │ │ - unclecode/crawl4ai:1.2.3 │ │ │
|
||||||
|
│ │ │ - unclecode/crawl4ai:1.2 │ │ │
|
||||||
|
│ │ │ - unclecode/crawl4ai:1 │ │ │
|
||||||
|
│ │ │ - unclecode/crawl4ai:latest │ │ │
|
||||||
|
│ │ └──────────────────────────────────────────────┘ │ │
|
||||||
|
│ └────────────────────────────────────────────────────────┘ │
|
||||||
|
└─────────────────────────────────────────────────────────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ External Services │
|
||||||
|
│ │
|
||||||
|
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||||
|
│ │ PyPI │ │ Docker Hub │ │ GitHub │ │
|
||||||
|
│ │ │ │ │ │ │ │
|
||||||
|
│ │ crawl4ai │ │ unclecode/ │ │ Releases │ │
|
||||||
|
│ │ 1.2.3 │ │ crawl4ai │ │ v1.2.3 │ │
|
||||||
|
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
||||||
|
└─────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Component Details
|
||||||
|
|
||||||
|
### 1. Release Pipeline (release.yml)
|
||||||
|
|
||||||
|
#### Purpose
|
||||||
|
Fast publication of Python package and GitHub release.
|
||||||
|
|
||||||
|
#### Input
|
||||||
|
- **Trigger**: Git tag matching `v*` (excluding `test-v*`)
|
||||||
|
- **Example**: `v1.2.3`
|
||||||
|
|
||||||
|
#### Processing Stages
|
||||||
|
|
||||||
|
##### Stage 1: Version Extraction
|
||||||
|
```bash
|
||||||
|
Input: refs/tags/v1.2.3
|
||||||
|
Output: VERSION=1.2.3
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementation**:
|
||||||
|
```bash
|
||||||
|
TAG_VERSION=${GITHUB_REF#refs/tags/v} # Remove 'refs/tags/v' prefix
|
||||||
|
echo "VERSION=$TAG_VERSION" >> $GITHUB_OUTPUT
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Stage 2: Version Validation
|
||||||
|
```bash
|
||||||
|
Input: TAG_VERSION=1.2.3
|
||||||
|
Check: crawl4ai/__version__.py contains __version__ = "1.2.3"
|
||||||
|
Output: Pass/Fail
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementation**:
|
||||||
|
```bash
|
||||||
|
PACKAGE_VERSION=$(python -c "from crawl4ai.__version__ import __version__; print(__version__)")
|
||||||
|
if [ "$TAG_VERSION" != "$PACKAGE_VERSION" ]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Stage 3: Package Build
|
||||||
|
```bash
|
||||||
|
Input: Source code + pyproject.toml
|
||||||
|
Output: dist/crawl4ai-1.2.3.tar.gz
|
||||||
|
dist/crawl4ai-1.2.3-py3-none-any.whl
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementation**:
|
||||||
|
```bash
|
||||||
|
python -m build
|
||||||
|
# Uses build backend defined in pyproject.toml
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Stage 4: PyPI Upload
|
||||||
|
```bash
|
||||||
|
Input: dist/*.{tar.gz,whl}
|
||||||
|
Auth: PYPI_TOKEN
|
||||||
|
Output: Package published to PyPI
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementation**:
|
||||||
|
```bash
|
||||||
|
twine upload dist/*
|
||||||
|
# Environment:
|
||||||
|
# TWINE_USERNAME: __token__
|
||||||
|
# TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Stage 5: GitHub Release Creation
|
||||||
|
```bash
|
||||||
|
Input: Tag: v1.2.3
|
||||||
|
Body: Markdown content
|
||||||
|
Output: Published GitHub release
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementation**:
|
||||||
|
```yaml
|
||||||
|
uses: softprops/action-gh-release@v2
|
||||||
|
with:
|
||||||
|
tag_name: v1.2.3
|
||||||
|
name: Release v1.2.3
|
||||||
|
body: |
|
||||||
|
Installation instructions and changelog
|
||||||
|
draft: false
|
||||||
|
prerelease: false
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Output
|
||||||
|
- **PyPI Package**: https://pypi.org/project/crawl4ai/1.2.3/
|
||||||
|
- **GitHub Release**: Published release on repository
|
||||||
|
- **Event**: `release.published` (triggers Docker workflow)
|
||||||
|
|
||||||
|
#### Timeline
|
||||||
|
```
|
||||||
|
0:00 - Tag pushed
|
||||||
|
0:01 - Checkout + Python setup
|
||||||
|
0:02 - Version validation
|
||||||
|
0:03 - Package build
|
||||||
|
0:04 - PyPI upload starts
|
||||||
|
0:06 - PyPI upload complete
|
||||||
|
0:07 - GitHub release created
|
||||||
|
0:08 - Workflow complete
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. Docker Release Pipeline (docker-release.yml)
|
||||||
|
|
||||||
|
#### Purpose
|
||||||
|
Build and publish multi-architecture Docker images.
|
||||||
|
|
||||||
|
#### Inputs
|
||||||
|
|
||||||
|
##### Input 1: Release Event (Automatic)
|
||||||
|
```yaml
|
||||||
|
Event: release.published
|
||||||
|
Data: github.event.release.tag_name = "v1.2.3"
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Input 2: Docker Rebuild Tag (Manual)
|
||||||
|
```yaml
|
||||||
|
Tag: docker-rebuild-v1.2.3
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Processing Stages
|
||||||
|
|
||||||
|
##### Stage 1: Version Detection
|
||||||
|
```bash
|
||||||
|
# From release event:
|
||||||
|
VERSION = github.event.release.tag_name.strip("v")
|
||||||
|
# Result: "1.2.3"
|
||||||
|
|
||||||
|
# From rebuild tag:
|
||||||
|
VERSION = GITHUB_REF.replace("refs/tags/docker-rebuild-v", "")
|
||||||
|
# Result: "1.2.3"
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Stage 2: Semantic Version Parsing
|
||||||
|
```bash
|
||||||
|
Input: VERSION=1.2.3
|
||||||
|
Output: MAJOR=1
|
||||||
|
MINOR=1.2
|
||||||
|
PATCH=3 (implicit)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementation**:
|
||||||
|
```bash
|
||||||
|
MAJOR=$(echo $VERSION | cut -d. -f1) # Extract first component
|
||||||
|
MINOR=$(echo $VERSION | cut -d. -f1-2) # Extract first two components
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Stage 3: Multi-Architecture Setup
|
||||||
|
```yaml
|
||||||
|
Setup:
|
||||||
|
- Docker Buildx (multi-platform builder)
|
||||||
|
- QEMU (ARM emulation on x86)
|
||||||
|
|
||||||
|
Platforms:
|
||||||
|
- linux/amd64 (x86_64)
|
||||||
|
- linux/arm64 (aarch64)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Architecture**:
|
||||||
|
```
|
||||||
|
GitHub Runner (linux/amd64)
|
||||||
|
├─ Buildx Builder
|
||||||
|
│ ├─ Native: Build linux/amd64 image
|
||||||
|
│ └─ QEMU: Emulate ARM to build linux/arm64 image
|
||||||
|
└─ Generate manifest list (points to both images)
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Stage 4: Docker Hub Authentication
|
||||||
|
```bash
|
||||||
|
Input: DOCKER_USERNAME
|
||||||
|
DOCKER_TOKEN
|
||||||
|
Output: Authenticated Docker client
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Stage 5: Build with Cache
|
||||||
|
```yaml
|
||||||
|
Cache Configuration:
|
||||||
|
cache-from: type=gha # Read from GitHub Actions cache
|
||||||
|
cache-to: type=gha,mode=max # Write all layers
|
||||||
|
|
||||||
|
Cache Key Components:
|
||||||
|
- Workflow file path
|
||||||
|
- Branch name
|
||||||
|
- Architecture (amd64/arm64)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Cache Hierarchy**:
|
||||||
|
```
|
||||||
|
Cache Entry: main/docker-release.yml/linux-amd64
|
||||||
|
├─ Layer: sha256:abc123... (FROM python:3.12)
|
||||||
|
├─ Layer: sha256:def456... (RUN apt-get update)
|
||||||
|
├─ Layer: sha256:ghi789... (COPY requirements.txt)
|
||||||
|
├─ Layer: sha256:jkl012... (RUN pip install)
|
||||||
|
└─ Layer: sha256:mno345... (COPY . /app)
|
||||||
|
|
||||||
|
Cache Hit/Miss Logic:
|
||||||
|
- If layer input unchanged → cache hit → skip build
|
||||||
|
- If layer input changed → cache miss → rebuild + all subsequent layers
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Stage 6: Tag Generation
|
||||||
|
```bash
|
||||||
|
Input: VERSION=1.2.3, MAJOR=1, MINOR=1.2
|
||||||
|
|
||||||
|
Output Tags:
|
||||||
|
- unclecode/crawl4ai:1.2.3 (exact version)
|
||||||
|
- unclecode/crawl4ai:1.2 (minor version)
|
||||||
|
- unclecode/crawl4ai:1 (major version)
|
||||||
|
- unclecode/crawl4ai:latest (latest stable)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Tag Strategy**:
|
||||||
|
- All tags point to same image SHA
|
||||||
|
- Users can pin to desired stability level
|
||||||
|
- Pushing new version updates `1`, `1.2`, and `latest` automatically
|
||||||
|
|
||||||
|
##### Stage 7: Push to Registry
|
||||||
|
```bash
|
||||||
|
For each tag:
|
||||||
|
For each platform (amd64, arm64):
|
||||||
|
Push image to Docker Hub
|
||||||
|
|
||||||
|
Create manifest list:
|
||||||
|
Manifest: unclecode/crawl4ai:1.2.3
|
||||||
|
├─ linux/amd64: sha256:abc...
|
||||||
|
└─ linux/arm64: sha256:def...
|
||||||
|
|
||||||
|
Docker CLI automatically selects correct platform on pull
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Output
|
||||||
|
- **Docker Images**: 4 tags × 2 platforms = 8 image variants + 4 manifests
|
||||||
|
- **Docker Hub**: https://hub.docker.com/r/unclecode/crawl4ai/tags
|
||||||
|
|
||||||
|
#### Timeline
|
||||||
|
|
||||||
|
**Cold Cache (First Build)**:
|
||||||
|
```
|
||||||
|
0:00 - Release event received
|
||||||
|
0:01 - Checkout + Buildx setup
|
||||||
|
0:02 - Docker Hub auth
|
||||||
|
0:03 - Start build (amd64)
|
||||||
|
0:08 - Complete amd64 build
|
||||||
|
0:09 - Start build (arm64)
|
||||||
|
0:14 - Complete arm64 build
|
||||||
|
0:15 - Generate manifests
|
||||||
|
0:16 - Push all tags
|
||||||
|
0:17 - Workflow complete
|
||||||
|
```
|
||||||
|
|
||||||
|
**Warm Cache (Code Change Only)**:
|
||||||
|
```
|
||||||
|
0:00 - Release event received
|
||||||
|
0:01 - Checkout + Buildx setup
|
||||||
|
0:02 - Docker Hub auth
|
||||||
|
0:03 - Start build (amd64) - cache hit for layers 1-4
|
||||||
|
0:04 - Complete amd64 build (only layer 5 rebuilt)
|
||||||
|
0:05 - Start build (arm64) - cache hit for layers 1-4
|
||||||
|
0:06 - Complete arm64 build (only layer 5 rebuilt)
|
||||||
|
0:07 - Generate manifests
|
||||||
|
0:08 - Push all tags
|
||||||
|
0:09 - Workflow complete
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Data Flow
|
||||||
|
|
||||||
|
### Version Information Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Developer
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
crawl4ai/__version__.py
|
||||||
|
__version__ = "1.2.3"
|
||||||
|
│
|
||||||
|
├─► Git Tag
|
||||||
|
│ v1.2.3
|
||||||
|
│ │
|
||||||
|
│ ▼
|
||||||
|
│ release.yml
|
||||||
|
│ │
|
||||||
|
│ ├─► Validation
|
||||||
|
│ │ ✓ Match
|
||||||
|
│ │
|
||||||
|
│ ├─► PyPI Package
|
||||||
|
│ │ crawl4ai==1.2.3
|
||||||
|
│ │
|
||||||
|
│ └─► GitHub Release
|
||||||
|
│ v1.2.3
|
||||||
|
│ │
|
||||||
|
│ ▼
|
||||||
|
│ docker-release.yml
|
||||||
|
│ │
|
||||||
|
│ └─► Docker Tags
|
||||||
|
│ 1.2.3, 1.2, 1, latest
|
||||||
|
│
|
||||||
|
└─► Package Metadata
|
||||||
|
pyproject.toml
|
||||||
|
version = "1.2.3"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Secrets Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
GitHub Secrets (Encrypted at Rest)
|
||||||
|
│
|
||||||
|
├─► PYPI_TOKEN
|
||||||
|
│ │
|
||||||
|
│ ▼
|
||||||
|
│ release.yml
|
||||||
|
│ │
|
||||||
|
│ ▼
|
||||||
|
│ TWINE_PASSWORD env var (masked in logs)
|
||||||
|
│ │
|
||||||
|
│ ▼
|
||||||
|
│ PyPI API (HTTPS)
|
||||||
|
│
|
||||||
|
├─► DOCKER_USERNAME
|
||||||
|
│ │
|
||||||
|
│ ▼
|
||||||
|
│ docker-release.yml
|
||||||
|
│ │
|
||||||
|
│ ▼
|
||||||
|
│ docker/login-action (masked in logs)
|
||||||
|
│ │
|
||||||
|
│ ▼
|
||||||
|
│ Docker Hub API (HTTPS)
|
||||||
|
│
|
||||||
|
└─► DOCKER_TOKEN
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
docker-release.yml
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
docker/login-action (masked in logs)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
Docker Hub API (HTTPS)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Artifact Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Source Code
|
||||||
|
│
|
||||||
|
├─► release.yml
|
||||||
|
│ │
|
||||||
|
│ ▼
|
||||||
|
│ python -m build
|
||||||
|
│ │
|
||||||
|
│ ├─► crawl4ai-1.2.3.tar.gz
|
||||||
|
│ │ │
|
||||||
|
│ │ ▼
|
||||||
|
│ │ PyPI Storage
|
||||||
|
│ │ │
|
||||||
|
│ │ ▼
|
||||||
|
│ │ pip install crawl4ai
|
||||||
|
│ │
|
||||||
|
│ └─► crawl4ai-1.2.3-py3-none-any.whl
|
||||||
|
│ │
|
||||||
|
│ ▼
|
||||||
|
│ PyPI Storage
|
||||||
|
│ │
|
||||||
|
│ ▼
|
||||||
|
│ pip install crawl4ai
|
||||||
|
│
|
||||||
|
└─► docker-release.yml
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
docker build
|
||||||
|
│
|
||||||
|
├─► Image: linux/amd64
|
||||||
|
│ │
|
||||||
|
│ └─► Docker Hub
|
||||||
|
│ unclecode/crawl4ai:1.2.3-amd64
|
||||||
|
│
|
||||||
|
└─► Image: linux/arm64
|
||||||
|
│
|
||||||
|
└─► Docker Hub
|
||||||
|
unclecode/crawl4ai:1.2.3-arm64
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## State Machines
|
||||||
|
|
||||||
|
### Release Pipeline State Machine
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────┐
|
||||||
|
│ START │
|
||||||
|
└────┬────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────────┐
|
||||||
|
│ Extract │
|
||||||
|
│ Version │
|
||||||
|
└──────┬───────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────────┐ ┌─────────┐
|
||||||
|
│ Validate │─────►│ FAILED │
|
||||||
|
│ Version │ No │ (Exit 1)│
|
||||||
|
└──────┬───────┘ └─────────┘
|
||||||
|
│ Yes
|
||||||
|
▼
|
||||||
|
┌──────────────┐
|
||||||
|
│ Build │
|
||||||
|
│ Package │
|
||||||
|
└──────┬───────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────────┐ ┌─────────┐
|
||||||
|
│ Upload │─────►│ FAILED │
|
||||||
|
│ to PyPI │ Error│ (Exit 1)│
|
||||||
|
└──────┬───────┘ └─────────┘
|
||||||
|
│ Success
|
||||||
|
▼
|
||||||
|
┌──────────────┐
|
||||||
|
│ Create │
|
||||||
|
│ GH Release │
|
||||||
|
└──────┬───────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────────┐
|
||||||
|
│ SUCCESS │
|
||||||
|
│ (Emit Event) │
|
||||||
|
└──────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker Pipeline State Machine
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────┐
|
||||||
|
│ START │
|
||||||
|
│ (Event) │
|
||||||
|
└────┬────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────────┐
|
||||||
|
│ Detect │
|
||||||
|
│ Version │
|
||||||
|
│ Source │
|
||||||
|
└──────┬───────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────────┐
|
||||||
|
│ Parse │
|
||||||
|
│ Semantic │
|
||||||
|
│ Versions │
|
||||||
|
└──────┬───────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────────┐ ┌─────────┐
|
||||||
|
│ Authenticate │─────►│ FAILED │
|
||||||
|
│ Docker Hub │ Error│ (Exit 1)│
|
||||||
|
└──────┬───────┘ └─────────┘
|
||||||
|
│ Success
|
||||||
|
▼
|
||||||
|
┌──────────────┐
|
||||||
|
│ Build │
|
||||||
|
│ amd64 │
|
||||||
|
└──────┬───────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────────┐ ┌─────────┐
|
||||||
|
│ Build │─────►│ FAILED │
|
||||||
|
│ arm64 │ Error│ (Exit 1)│
|
||||||
|
└──────┬───────┘ └─────────┘
|
||||||
|
│ Success
|
||||||
|
▼
|
||||||
|
┌──────────────┐
|
||||||
|
│ Push All │
|
||||||
|
│ Tags │
|
||||||
|
└──────┬───────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────────┐
|
||||||
|
│ SUCCESS │
|
||||||
|
└──────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Security Architecture
|
||||||
|
|
||||||
|
### Threat Model
|
||||||
|
|
||||||
|
#### Threats Mitigated
|
||||||
|
|
||||||
|
1. **Secret Exposure**
|
||||||
|
- Mitigation: GitHub Actions secret masking
|
||||||
|
- Evidence: Secrets never appear in logs
|
||||||
|
|
||||||
|
2. **Unauthorized Package Upload**
|
||||||
|
- Mitigation: Scoped PyPI tokens
|
||||||
|
- Evidence: Token limited to `crawl4ai` project
|
||||||
|
|
||||||
|
3. **Man-in-the-Middle**
|
||||||
|
- Mitigation: HTTPS for all API calls
|
||||||
|
- Evidence: PyPI, Docker Hub, GitHub all use TLS
|
||||||
|
|
||||||
|
4. **Supply Chain Tampering**
|
||||||
|
- Mitigation: Immutable artifacts, content checksums
|
||||||
|
- Evidence: PyPI stores SHA256, Docker uses content-addressable storage
|
||||||
|
|
||||||
|
#### Trust Boundaries
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────┐
|
||||||
|
│ Trusted Zone │
|
||||||
|
│ ┌────────────────────────────────┐ │
|
||||||
|
│ │ GitHub Actions Runner │ │
|
||||||
|
│ │ - Ephemeral VM │ │
|
||||||
|
│ │ - Isolated environment │ │
|
||||||
|
│ │ - Access to secrets │ │
|
||||||
|
│ └────────────────────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ │ HTTPS (TLS 1.2+) │
|
||||||
|
│ ▼ │
|
||||||
|
└─────────────────────────────────────────┘
|
||||||
|
│
|
||||||
|
┌────────────┼────────────┐
|
||||||
|
│ │ │
|
||||||
|
▼ ▼ ▼
|
||||||
|
┌────────┐ ┌─────────┐ ┌──────────┐
|
||||||
|
│ PyPI │ │ Docker │ │ GitHub │
|
||||||
|
│ API │ │ Hub │ │ API │
|
||||||
|
└────────┘ └─────────┘ └──────────┘
|
||||||
|
External External External
|
||||||
|
Service Service Service
|
||||||
|
```
|
||||||
|
|
||||||
|
### Secret Management
|
||||||
|
|
||||||
|
#### Secret Lifecycle
|
||||||
|
|
||||||
|
```
|
||||||
|
Creation (Developer)
|
||||||
|
│
|
||||||
|
├─► PyPI: Create API token (scoped to project)
|
||||||
|
├─► Docker Hub: Create access token (read/write)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
Storage (GitHub)
|
||||||
|
│
|
||||||
|
├─► Encrypted at rest (AES-256)
|
||||||
|
├─► Access controlled (repo-scoped)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
Usage (Workflow)
|
||||||
|
│
|
||||||
|
├─► Injected as env vars
|
||||||
|
├─► Masked in logs (GitHub redacts on output)
|
||||||
|
├─► Never persisted to disk (in-memory only)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
Transmission (API Call)
|
||||||
|
│
|
||||||
|
├─► HTTPS only
|
||||||
|
├─► TLS 1.2+ with strong ciphers
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
Rotation (Manual)
|
||||||
|
│
|
||||||
|
└─► Regenerate on PyPI/Docker Hub
|
||||||
|
Update GitHub secret
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Performance Characteristics
|
||||||
|
|
||||||
|
### Release Pipeline Performance
|
||||||
|
|
||||||
|
| Metric | Value | Notes |
|
||||||
|
|--------|-------|-------|
|
||||||
|
| Cold start | ~2-3 min | First run on new runner |
|
||||||
|
| Warm start | ~2-3 min | Minimal caching benefit |
|
||||||
|
| PyPI upload | ~30-60 sec | Network-bound |
|
||||||
|
| Package build | ~30 sec | CPU-bound |
|
||||||
|
| Parallelization | None | Sequential by design |
|
||||||
|
|
||||||
|
### Docker Pipeline Performance
|
||||||
|
|
||||||
|
| Metric | Cold Cache | Warm Cache (code) | Warm Cache (deps) |
|
||||||
|
|--------|-----------|-------------------|-------------------|
|
||||||
|
| Total time | 10-15 min | 1-2 min | 3-5 min |
|
||||||
|
| amd64 build | 5-7 min | 30-60 sec | 1-2 min |
|
||||||
|
| arm64 build | 5-7 min | 30-60 sec | 1-2 min |
|
||||||
|
| Push time | 1-2 min | 30 sec | 30 sec |
|
||||||
|
| Cache hit rate | 0% | 85% | 60% |
|
||||||
|
|
||||||
|
### Cache Performance Model
|
||||||
|
|
||||||
|
```python
|
||||||
|
def estimate_build_time(changes):
|
||||||
|
base_time = 60 # seconds (setup + push)
|
||||||
|
|
||||||
|
if "Dockerfile" in changes:
|
||||||
|
return base_time + (10 * 60) # Full rebuild: ~11 min
|
||||||
|
elif "requirements.txt" in changes:
|
||||||
|
return base_time + (3 * 60) # Deps rebuild: ~4 min
|
||||||
|
elif any(f.endswith(".py") for f in changes):
|
||||||
|
return base_time + 60 # Code only: ~2 min
|
||||||
|
else:
|
||||||
|
return base_time # No changes: ~1 min
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Scalability Considerations
|
||||||
|
|
||||||
|
### Current Limits
|
||||||
|
|
||||||
|
| Resource | Limit | Impact |
|
||||||
|
|----------|-------|--------|
|
||||||
|
| Workflow concurrency | 20 (default) | Max 20 releases in parallel |
|
||||||
|
| Artifact storage | 500 MB/artifact | PyPI packages small (<10 MB) |
|
||||||
|
| Cache storage | 10 GB/repo | Docker layers fit comfortably |
|
||||||
|
| Workflow run time | 6 hours | Plenty of headroom |
|
||||||
|
|
||||||
|
### Scaling Strategies
|
||||||
|
|
||||||
|
#### Horizontal Scaling (Multiple Repos)
|
||||||
|
```
|
||||||
|
crawl4ai (main)
|
||||||
|
├─ release.yml
|
||||||
|
└─ docker-release.yml
|
||||||
|
|
||||||
|
crawl4ai-plugins (separate)
|
||||||
|
├─ release.yml
|
||||||
|
└─ docker-release.yml
|
||||||
|
|
||||||
|
Each repo has independent:
|
||||||
|
- Secrets
|
||||||
|
- Cache (10 GB each)
|
||||||
|
- Concurrency limits (20 each)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Vertical Scaling (Larger Runners)
|
||||||
|
```yaml
|
||||||
|
jobs:
|
||||||
|
docker:
|
||||||
|
runs-on: ubuntu-latest-8-cores # GitHub-hosted larger runner
|
||||||
|
# 4x faster builds for CPU-bound layers
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Disaster Recovery
|
||||||
|
|
||||||
|
### Failure Scenarios
|
||||||
|
|
||||||
|
#### Scenario 1: Release Pipeline Fails
|
||||||
|
|
||||||
|
**Failure Point**: PyPI upload fails (network error)
|
||||||
|
|
||||||
|
**State**:
|
||||||
|
- ✓ Version validated
|
||||||
|
- ✓ Package built
|
||||||
|
- ✗ PyPI upload
|
||||||
|
- ✗ GitHub release
|
||||||
|
|
||||||
|
**Recovery**:
|
||||||
|
```bash
|
||||||
|
# Manual upload
|
||||||
|
twine upload dist/*
|
||||||
|
|
||||||
|
# Retry workflow (re-run from GitHub Actions UI)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Prevention**: Add retry logic to PyPI upload
|
||||||
|
|
||||||
|
#### Scenario 2: Docker Pipeline Fails
|
||||||
|
|
||||||
|
**Failure Point**: ARM build fails (dependency issue)
|
||||||
|
|
||||||
|
**State**:
|
||||||
|
- ✓ PyPI published
|
||||||
|
- ✓ GitHub release created
|
||||||
|
- ✓ amd64 image built
|
||||||
|
- ✗ arm64 image build
|
||||||
|
|
||||||
|
**Recovery**:
|
||||||
|
```bash
|
||||||
|
# Fix Dockerfile
|
||||||
|
git commit -am "fix: ARM build dependency"
|
||||||
|
|
||||||
|
# Trigger rebuild
|
||||||
|
git tag docker-rebuild-v1.2.3
|
||||||
|
git push origin docker-rebuild-v1.2.3
|
||||||
|
```
|
||||||
|
|
||||||
|
**Impact**: PyPI package available, only Docker ARM users affected
|
||||||
|
|
||||||
|
#### Scenario 3: Partial Release
|
||||||
|
|
||||||
|
**Failure Point**: GitHub release creation fails
|
||||||
|
|
||||||
|
**State**:
|
||||||
|
- ✓ PyPI published
|
||||||
|
- ✗ GitHub release
|
||||||
|
- ✗ Docker images
|
||||||
|
|
||||||
|
**Recovery**:
|
||||||
|
```bash
|
||||||
|
# Create release manually
|
||||||
|
gh release create v1.2.3 \
|
||||||
|
--title "Release v1.2.3" \
|
||||||
|
--notes "..."
|
||||||
|
|
||||||
|
# This triggers docker-release.yml automatically
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Monitoring and Observability
|
||||||
|
|
||||||
|
### Metrics to Track
|
||||||
|
|
||||||
|
#### Release Pipeline
|
||||||
|
- Success rate (target: >99%)
|
||||||
|
- Duration (target: <3 min)
|
||||||
|
- PyPI upload time (target: <60 sec)
|
||||||
|
|
||||||
|
#### Docker Pipeline
|
||||||
|
- Success rate (target: >95%)
|
||||||
|
- Duration (target: <15 min cold, <2 min warm)
|
||||||
|
- Cache hit rate (target: >80% for code changes)
|
||||||
|
|
||||||
|
### Alerting
|
||||||
|
|
||||||
|
**Critical Alerts**:
|
||||||
|
- Release pipeline failure (blocks release)
|
||||||
|
- PyPI authentication failure (expired token)
|
||||||
|
|
||||||
|
**Warning Alerts**:
|
||||||
|
- Docker build >15 min (performance degradation)
|
||||||
|
- Cache hit rate <50% (cache issue)
|
||||||
|
|
||||||
|
### Logging
|
||||||
|
|
||||||
|
**GitHub Actions Logs**:
|
||||||
|
- Retention: 90 days
|
||||||
|
- Downloadable: Yes
|
||||||
|
- Searchable: Limited
|
||||||
|
|
||||||
|
**Recommended External Logging**:
|
||||||
|
```yaml
|
||||||
|
- name: Send logs to external service
|
||||||
|
if: failure()
|
||||||
|
run: |
|
||||||
|
curl -X POST https://logs.example.com/api/v1/logs \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"workflow\": \"${{ github.workflow }}\", \"status\": \"failed\"}"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Future Enhancements
|
||||||
|
|
||||||
|
### Planned Improvements
|
||||||
|
|
||||||
|
1. **Automated Changelog Generation**
|
||||||
|
- Use conventional commits
|
||||||
|
- Generate CHANGELOG.md automatically
|
||||||
|
|
||||||
|
2. **Pre-release Testing**
|
||||||
|
- Test builds on `test-v*` tags
|
||||||
|
- Upload to TestPyPI
|
||||||
|
|
||||||
|
3. **Notification System**
|
||||||
|
- Slack/Discord notifications on release
|
||||||
|
- Email on failure
|
||||||
|
|
||||||
|
4. **Performance Optimization**
|
||||||
|
- Parallel Docker builds (amd64 + arm64 simultaneously)
|
||||||
|
- Persistent runners for better caching
|
||||||
|
|
||||||
|
5. **Enhanced Validation**
|
||||||
|
- Smoke tests after PyPI upload
|
||||||
|
- Container security scanning
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [GitHub Actions Architecture](https://docs.github.com/en/actions/learn-github-actions/understanding-github-actions)
|
||||||
|
- [Docker Build Cache](https://docs.docker.com/build/cache/)
|
||||||
|
- [PyPI API Documentation](https://warehouse.pypa.io/api-reference/)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Last Updated**: 2025-01-21
|
||||||
|
**Version**: 2.0
|
||||||
1029
.github/workflows/docs/README.md
vendored
Normal file
1029
.github/workflows/docs/README.md
vendored
Normal file
File diff suppressed because it is too large
Load Diff
287
.github/workflows/docs/WORKFLOW_REFERENCE.md
vendored
Normal file
287
.github/workflows/docs/WORKFLOW_REFERENCE.md
vendored
Normal file
@@ -0,0 +1,287 @@
|
|||||||
|
# Workflow Quick Reference
|
||||||
|
|
||||||
|
## Quick Commands
|
||||||
|
|
||||||
|
### Standard Release
|
||||||
|
```bash
|
||||||
|
# 1. Update version
|
||||||
|
vim crawl4ai/__version__.py # Set to "1.2.3"
|
||||||
|
|
||||||
|
# 2. Commit and tag
|
||||||
|
git add crawl4ai/__version__.py
|
||||||
|
git commit -m "chore: bump version to 1.2.3"
|
||||||
|
git tag v1.2.3
|
||||||
|
git push origin main
|
||||||
|
git push origin v1.2.3
|
||||||
|
|
||||||
|
# 3. Monitor
|
||||||
|
# - PyPI: ~2-3 minutes
|
||||||
|
# - Docker: ~1-15 minutes
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker Rebuild Only
|
||||||
|
```bash
|
||||||
|
git tag docker-rebuild-v1.2.3
|
||||||
|
git push origin docker-rebuild-v1.2.3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Delete Tag (Undo Release)
|
||||||
|
```bash
|
||||||
|
# Local
|
||||||
|
git tag -d v1.2.3
|
||||||
|
|
||||||
|
# Remote
|
||||||
|
git push --delete origin v1.2.3
|
||||||
|
|
||||||
|
# GitHub Release
|
||||||
|
gh release delete v1.2.3
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Workflow Triggers
|
||||||
|
|
||||||
|
### release.yml
|
||||||
|
| Event | Pattern | Example |
|
||||||
|
|-------|---------|---------|
|
||||||
|
| Tag push | `v*` | `v1.2.3` |
|
||||||
|
| Excludes | `test-v*` | `test-v1.2.3` |
|
||||||
|
|
||||||
|
### docker-release.yml
|
||||||
|
| Event | Pattern | Example |
|
||||||
|
|-------|---------|---------|
|
||||||
|
| Release published | `release.published` | Automatic |
|
||||||
|
| Tag push | `docker-rebuild-v*` | `docker-rebuild-v1.2.3` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
### release.yml
|
||||||
|
| Variable | Source | Example |
|
||||||
|
|----------|--------|---------|
|
||||||
|
| `VERSION` | Git tag | `1.2.3` |
|
||||||
|
| `TWINE_USERNAME` | Static | `__token__` |
|
||||||
|
| `TWINE_PASSWORD` | Secret | `pypi-Ag...` |
|
||||||
|
| `GITHUB_TOKEN` | Auto | `ghp_...` |
|
||||||
|
|
||||||
|
### docker-release.yml
|
||||||
|
| Variable | Source | Example |
|
||||||
|
|----------|--------|---------|
|
||||||
|
| `VERSION` | Release/Tag | `1.2.3` |
|
||||||
|
| `MAJOR` | Computed | `1` |
|
||||||
|
| `MINOR` | Computed | `1.2` |
|
||||||
|
| `DOCKER_USERNAME` | Secret | `unclecode` |
|
||||||
|
| `DOCKER_TOKEN` | Secret | `dckr_pat_...` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Docker Tags Generated
|
||||||
|
|
||||||
|
| Version | Tags Created |
|
||||||
|
|---------|-------------|
|
||||||
|
| v1.0.0 | `1.0.0`, `1.0`, `1`, `latest` |
|
||||||
|
| v1.1.0 | `1.1.0`, `1.1`, `1`, `latest` |
|
||||||
|
| v1.2.3 | `1.2.3`, `1.2`, `1`, `latest` |
|
||||||
|
| v2.0.0 | `2.0.0`, `2.0`, `2`, `latest` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Workflow Outputs
|
||||||
|
|
||||||
|
### release.yml
|
||||||
|
| Output | Location | Time |
|
||||||
|
|--------|----------|------|
|
||||||
|
| PyPI Package | https://pypi.org/project/crawl4ai/ | ~2-3 min |
|
||||||
|
| GitHub Release | Repository → Releases | ~2-3 min |
|
||||||
|
| Workflow Summary | Actions → Run → Summary | Immediate |
|
||||||
|
|
||||||
|
### docker-release.yml
|
||||||
|
| Output | Location | Time |
|
||||||
|
|--------|----------|------|
|
||||||
|
| Docker Images | https://hub.docker.com/r/unclecode/crawl4ai | ~1-15 min |
|
||||||
|
| Workflow Summary | Actions → Run → Summary | Immediate |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Common Issues
|
||||||
|
|
||||||
|
| Issue | Solution |
|
||||||
|
|-------|----------|
|
||||||
|
| Version mismatch | Update `crawl4ai/__version__.py` to match tag |
|
||||||
|
| PyPI 403 Forbidden | Check `PYPI_TOKEN` secret |
|
||||||
|
| PyPI 400 File exists | Version already published, increment version |
|
||||||
|
| Docker auth failed | Regenerate `DOCKER_TOKEN` |
|
||||||
|
| Docker build timeout | Check Dockerfile, review build logs |
|
||||||
|
| Cache not working | First build on branch always cold |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Secrets Checklist
|
||||||
|
|
||||||
|
- [ ] `PYPI_TOKEN` - PyPI API token (project or account scope)
|
||||||
|
- [ ] `DOCKER_USERNAME` - Docker Hub username
|
||||||
|
- [ ] `DOCKER_TOKEN` - Docker Hub access token (read/write)
|
||||||
|
- [ ] `GITHUB_TOKEN` - Auto-provided (no action needed)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Workflow Dependencies
|
||||||
|
|
||||||
|
### release.yml Dependencies
|
||||||
|
```yaml
|
||||||
|
Python: 3.12
|
||||||
|
Actions:
|
||||||
|
- actions/checkout@v4
|
||||||
|
- actions/setup-python@v5
|
||||||
|
- softprops/action-gh-release@v2
|
||||||
|
PyPI Packages:
|
||||||
|
- build
|
||||||
|
- twine
|
||||||
|
```
|
||||||
|
|
||||||
|
### docker-release.yml Dependencies
|
||||||
|
```yaml
|
||||||
|
Actions:
|
||||||
|
- actions/checkout@v4
|
||||||
|
- docker/setup-buildx-action@v3
|
||||||
|
- docker/login-action@v3
|
||||||
|
- docker/build-push-action@v5
|
||||||
|
Docker:
|
||||||
|
- Buildx
|
||||||
|
- QEMU (for multi-arch)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Cache Information
|
||||||
|
|
||||||
|
### Type
|
||||||
|
- GitHub Actions Cache (`type=gha`)
|
||||||
|
|
||||||
|
### Storage
|
||||||
|
- **Limit**: 10GB per repository
|
||||||
|
- **Retention**: 7 days for unused entries
|
||||||
|
- **Cleanup**: Automatic LRU eviction
|
||||||
|
|
||||||
|
### Performance
|
||||||
|
| Scenario | Cache Hit | Build Time |
|
||||||
|
|----------|-----------|------------|
|
||||||
|
| First build | 0% | 10-15 min |
|
||||||
|
| Code change only | 85% | 1-2 min |
|
||||||
|
| Dependency update | 60% | 3-5 min |
|
||||||
|
| No changes | 100% | 30-60 sec |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Build Platforms
|
||||||
|
|
||||||
|
| Platform | Architecture | Devices |
|
||||||
|
|----------|--------------|---------|
|
||||||
|
| linux/amd64 | x86_64 | Intel/AMD servers, AWS EC2, GCP |
|
||||||
|
| linux/arm64 | aarch64 | Apple Silicon, AWS Graviton, Raspberry Pi |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Version Validation
|
||||||
|
|
||||||
|
### Pre-Tag Checklist
|
||||||
|
```bash
|
||||||
|
# Check current version
|
||||||
|
python -c "from crawl4ai.__version__ import __version__; print(__version__)"
|
||||||
|
|
||||||
|
# Verify it matches intended tag
|
||||||
|
# If tag is v1.2.3, version should be "1.2.3"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Post-Release Verification
|
||||||
|
```bash
|
||||||
|
# PyPI
|
||||||
|
pip install crawl4ai==1.2.3
|
||||||
|
python -c "import crawl4ai; print(crawl4ai.__version__)"
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
docker pull unclecode/crawl4ai:1.2.3
|
||||||
|
docker run unclecode/crawl4ai:1.2.3 python -c "import crawl4ai; print(crawl4ai.__version__)"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Monitoring URLs
|
||||||
|
|
||||||
|
| Service | URL |
|
||||||
|
|---------|-----|
|
||||||
|
| GitHub Actions | `https://github.com/{owner}/{repo}/actions` |
|
||||||
|
| PyPI Project | `https://pypi.org/project/crawl4ai/` |
|
||||||
|
| Docker Hub | `https://hub.docker.com/r/unclecode/crawl4ai` |
|
||||||
|
| GitHub Releases | `https://github.com/{owner}/{repo}/releases` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Rollback Strategy
|
||||||
|
|
||||||
|
### PyPI (Cannot Delete)
|
||||||
|
```bash
|
||||||
|
# Increment patch version
|
||||||
|
git tag v1.2.4
|
||||||
|
git push origin v1.2.4
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker (Can Overwrite)
|
||||||
|
```bash
|
||||||
|
# Rebuild with fix
|
||||||
|
git tag docker-rebuild-v1.2.3
|
||||||
|
git push origin docker-rebuild-v1.2.3
|
||||||
|
```
|
||||||
|
|
||||||
|
### GitHub Release
|
||||||
|
```bash
|
||||||
|
# Delete release
|
||||||
|
gh release delete v1.2.3
|
||||||
|
|
||||||
|
# Delete tag
|
||||||
|
git push --delete origin v1.2.3
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Status Badge Markdown
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
[](https://github.com/{owner}/{repo}/actions/workflows/release.yml)
|
||||||
|
|
||||||
|
[](https://github.com/{owner}/{repo}/actions/workflows/docker-release.yml)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Timeline Example
|
||||||
|
|
||||||
|
```
|
||||||
|
0:00 - Push tag v1.2.3
|
||||||
|
0:01 - release.yml starts
|
||||||
|
0:02 - Version validation passes
|
||||||
|
0:03 - Package built
|
||||||
|
0:04 - PyPI upload starts
|
||||||
|
0:06 - PyPI upload complete ✓
|
||||||
|
0:07 - GitHub release created ✓
|
||||||
|
0:08 - release.yml complete
|
||||||
|
0:08 - docker-release.yml triggered
|
||||||
|
0:10 - Docker build starts
|
||||||
|
0:12 - amd64 image built (cache hit)
|
||||||
|
0:14 - arm64 image built (cache hit)
|
||||||
|
0:15 - Images pushed to Docker Hub ✓
|
||||||
|
0:16 - docker-release.yml complete
|
||||||
|
|
||||||
|
Total: ~16 minutes
|
||||||
|
Critical path (PyPI + GitHub): ~8 minutes
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Contact
|
||||||
|
|
||||||
|
For workflow issues:
|
||||||
|
1. Check Actions tab for logs
|
||||||
|
2. Review this reference
|
||||||
|
3. See [README.md](./README.md) for detailed docs
|
||||||
79
.github/workflows/release.yml
vendored
79
.github/workflows/release.yml
vendored
@@ -10,53 +10,53 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
permissions:
|
permissions:
|
||||||
contents: write # Required for creating releases
|
contents: write # Required for creating releases
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: '3.12'
|
python-version: '3.12'
|
||||||
|
|
||||||
- name: Extract version from tag
|
- name: Extract version from tag
|
||||||
id: get_version
|
id: get_version
|
||||||
run: |
|
run: |
|
||||||
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
||||||
echo "VERSION=$TAG_VERSION" >> $GITHUB_OUTPUT
|
echo "VERSION=$TAG_VERSION" >> $GITHUB_OUTPUT
|
||||||
echo "Releasing version: $TAG_VERSION"
|
echo "Releasing version: $TAG_VERSION"
|
||||||
|
|
||||||
- name: Install package dependencies
|
- name: Install package dependencies
|
||||||
run: |
|
run: |
|
||||||
pip install -e .
|
pip install -e .
|
||||||
|
|
||||||
- name: Check version consistency
|
- name: Check version consistency
|
||||||
run: |
|
run: |
|
||||||
TAG_VERSION=${{ steps.get_version.outputs.VERSION }}
|
TAG_VERSION=${{ steps.get_version.outputs.VERSION }}
|
||||||
PACKAGE_VERSION=$(python -c "from crawl4ai.__version__ import __version__; print(__version__)")
|
PACKAGE_VERSION=$(python -c "from crawl4ai.__version__ import __version__; print(__version__)")
|
||||||
|
|
||||||
echo "Tag version: $TAG_VERSION"
|
echo "Tag version: $TAG_VERSION"
|
||||||
echo "Package version: $PACKAGE_VERSION"
|
echo "Package version: $PACKAGE_VERSION"
|
||||||
|
|
||||||
if [ "$TAG_VERSION" != "$PACKAGE_VERSION" ]; then
|
if [ "$TAG_VERSION" != "$PACKAGE_VERSION" ]; then
|
||||||
echo "❌ Version mismatch! Tag: $TAG_VERSION, Package: $PACKAGE_VERSION"
|
echo "❌ Version mismatch! Tag: $TAG_VERSION, Package: $PACKAGE_VERSION"
|
||||||
echo "Please update crawl4ai/__version__.py to match the tag version"
|
echo "Please update crawl4ai/__version__.py to match the tag version"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo "✅ Version check passed: $TAG_VERSION"
|
echo "✅ Version check passed: $TAG_VERSION"
|
||||||
|
|
||||||
- name: Install build dependencies
|
- name: Install build dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install build twine
|
pip install build twine
|
||||||
|
|
||||||
- name: Build package
|
- name: Build package
|
||||||
run: python -m build
|
run: python -m build
|
||||||
|
|
||||||
- name: Check package
|
- name: Check package
|
||||||
run: twine check dist/*
|
run: twine check dist/*
|
||||||
|
|
||||||
- name: Upload to PyPI
|
- name: Upload to PyPI
|
||||||
env:
|
env:
|
||||||
TWINE_USERNAME: __token__
|
TWINE_USERNAME: __token__
|
||||||
@@ -65,37 +65,7 @@ jobs:
|
|||||||
echo "📦 Uploading to PyPI..."
|
echo "📦 Uploading to PyPI..."
|
||||||
twine upload dist/*
|
twine upload dist/*
|
||||||
echo "✅ Package uploaded to https://pypi.org/project/crawl4ai/"
|
echo "✅ Package uploaded to https://pypi.org/project/crawl4ai/"
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
|
|
||||||
- name: Log in to Docker Hub
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.DOCKER_USERNAME }}
|
|
||||||
password: ${{ secrets.DOCKER_TOKEN }}
|
|
||||||
|
|
||||||
- name: Extract major and minor versions
|
|
||||||
id: versions
|
|
||||||
run: |
|
|
||||||
VERSION=${{ steps.get_version.outputs.VERSION }}
|
|
||||||
MAJOR=$(echo $VERSION | cut -d. -f1)
|
|
||||||
MINOR=$(echo $VERSION | cut -d. -f1-2)
|
|
||||||
echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT
|
|
||||||
echo "MINOR=$MINOR" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
- name: Build and push Docker images
|
|
||||||
uses: docker/build-push-action@v5
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
push: true
|
|
||||||
tags: |
|
|
||||||
unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}
|
|
||||||
unclecode/crawl4ai:${{ steps.versions.outputs.MINOR }}
|
|
||||||
unclecode/crawl4ai:${{ steps.versions.outputs.MAJOR }}
|
|
||||||
unclecode/crawl4ai:latest
|
|
||||||
platforms: linux/amd64,linux/arm64
|
|
||||||
|
|
||||||
- name: Create GitHub Release
|
- name: Create GitHub Release
|
||||||
uses: softprops/action-gh-release@v2
|
uses: softprops/action-gh-release@v2
|
||||||
with:
|
with:
|
||||||
@@ -103,26 +73,29 @@ jobs:
|
|||||||
name: Release v${{ steps.get_version.outputs.VERSION }}
|
name: Release v${{ steps.get_version.outputs.VERSION }}
|
||||||
body: |
|
body: |
|
||||||
## 🎉 Crawl4AI v${{ steps.get_version.outputs.VERSION }} Released!
|
## 🎉 Crawl4AI v${{ steps.get_version.outputs.VERSION }} Released!
|
||||||
|
|
||||||
### 📦 Installation
|
### 📦 Installation
|
||||||
|
|
||||||
**PyPI:**
|
**PyPI:**
|
||||||
```bash
|
```bash
|
||||||
pip install crawl4ai==${{ steps.get_version.outputs.VERSION }}
|
pip install crawl4ai==${{ steps.get_version.outputs.VERSION }}
|
||||||
```
|
```
|
||||||
|
|
||||||
**Docker:**
|
**Docker:**
|
||||||
```bash
|
```bash
|
||||||
docker pull unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}
|
docker pull unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}
|
||||||
docker pull unclecode/crawl4ai:latest
|
docker pull unclecode/crawl4ai:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Note:** Docker images are being built and will be available shortly.
|
||||||
|
Check the [Docker Release workflow](https://github.com/${{ github.repository }}/actions/workflows/docker-release.yml) for build status.
|
||||||
|
|
||||||
### 📝 What's Changed
|
### 📝 What's Changed
|
||||||
See [CHANGELOG.md](https://github.com/${{ github.repository }}/blob/main/CHANGELOG.md) for details.
|
See [CHANGELOG.md](https://github.com/${{ github.repository }}/blob/main/CHANGELOG.md) for details.
|
||||||
draft: false
|
draft: false
|
||||||
prerelease: false
|
prerelease: false
|
||||||
token: ${{ secrets.GITHUB_TOKEN }}
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Summary
|
- name: Summary
|
||||||
run: |
|
run: |
|
||||||
echo "## 🚀 Release Complete!" >> $GITHUB_STEP_SUMMARY
|
echo "## 🚀 Release Complete!" >> $GITHUB_STEP_SUMMARY
|
||||||
@@ -132,11 +105,9 @@ jobs:
|
|||||||
echo "- URL: https://pypi.org/project/crawl4ai/" >> $GITHUB_STEP_SUMMARY
|
echo "- URL: https://pypi.org/project/crawl4ai/" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- Install: \`pip install crawl4ai==${{ steps.get_version.outputs.VERSION }}\`" >> $GITHUB_STEP_SUMMARY
|
echo "- Install: \`pip install crawl4ai==${{ steps.get_version.outputs.VERSION }}\`" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "### 🐳 Docker Images" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "- \`unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}\`" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "- \`unclecode/crawl4ai:${{ steps.versions.outputs.MINOR }}\`" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "- \`unclecode/crawl4ai:${{ steps.versions.outputs.MAJOR }}\`" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "- \`unclecode/crawl4ai:latest\`" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "### 📋 GitHub Release" >> $GITHUB_STEP_SUMMARY
|
echo "### 📋 GitHub Release" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "https://github.com/${{ github.repository }}/releases/tag/v${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
|
echo "- https://github.com/${{ github.repository }}/releases/tag/v${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "### 🐳 Docker Images" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "Docker images are being built in a separate workflow." >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "Check: https://github.com/${{ github.repository }}/actions/workflows/docker-release.yml" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|||||||
142
.github/workflows/release.yml.backup
vendored
Normal file
142
.github/workflows/release.yml.backup
vendored
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
name: Release Pipeline
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- 'v*'
|
||||||
|
- '!test-v*' # Exclude test tags
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
release:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: write # Required for creating releases
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
|
||||||
|
- name: Extract version from tag
|
||||||
|
id: get_version
|
||||||
|
run: |
|
||||||
|
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
||||||
|
echo "VERSION=$TAG_VERSION" >> $GITHUB_OUTPUT
|
||||||
|
echo "Releasing version: $TAG_VERSION"
|
||||||
|
|
||||||
|
- name: Install package dependencies
|
||||||
|
run: |
|
||||||
|
pip install -e .
|
||||||
|
|
||||||
|
- name: Check version consistency
|
||||||
|
run: |
|
||||||
|
TAG_VERSION=${{ steps.get_version.outputs.VERSION }}
|
||||||
|
PACKAGE_VERSION=$(python -c "from crawl4ai.__version__ import __version__; print(__version__)")
|
||||||
|
|
||||||
|
echo "Tag version: $TAG_VERSION"
|
||||||
|
echo "Package version: $PACKAGE_VERSION"
|
||||||
|
|
||||||
|
if [ "$TAG_VERSION" != "$PACKAGE_VERSION" ]; then
|
||||||
|
echo "❌ Version mismatch! Tag: $TAG_VERSION, Package: $PACKAGE_VERSION"
|
||||||
|
echo "Please update crawl4ai/__version__.py to match the tag version"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "✅ Version check passed: $TAG_VERSION"
|
||||||
|
|
||||||
|
- name: Install build dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install build twine
|
||||||
|
|
||||||
|
- name: Build package
|
||||||
|
run: python -m build
|
||||||
|
|
||||||
|
- name: Check package
|
||||||
|
run: twine check dist/*
|
||||||
|
|
||||||
|
- name: Upload to PyPI
|
||||||
|
env:
|
||||||
|
TWINE_USERNAME: __token__
|
||||||
|
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
|
||||||
|
run: |
|
||||||
|
echo "📦 Uploading to PyPI..."
|
||||||
|
twine upload dist/*
|
||||||
|
echo "✅ Package uploaded to https://pypi.org/project/crawl4ai/"
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Log in to Docker Hub
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
username: ${{ secrets.DOCKER_USERNAME }}
|
||||||
|
password: ${{ secrets.DOCKER_TOKEN }}
|
||||||
|
|
||||||
|
- name: Extract major and minor versions
|
||||||
|
id: versions
|
||||||
|
run: |
|
||||||
|
VERSION=${{ steps.get_version.outputs.VERSION }}
|
||||||
|
MAJOR=$(echo $VERSION | cut -d. -f1)
|
||||||
|
MINOR=$(echo $VERSION | cut -d. -f1-2)
|
||||||
|
echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT
|
||||||
|
echo "MINOR=$MINOR" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Build and push Docker images
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
tags: |
|
||||||
|
unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}
|
||||||
|
unclecode/crawl4ai:${{ steps.versions.outputs.MINOR }}
|
||||||
|
unclecode/crawl4ai:${{ steps.versions.outputs.MAJOR }}
|
||||||
|
unclecode/crawl4ai:latest
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
|
||||||
|
- name: Create GitHub Release
|
||||||
|
uses: softprops/action-gh-release@v2
|
||||||
|
with:
|
||||||
|
tag_name: v${{ steps.get_version.outputs.VERSION }}
|
||||||
|
name: Release v${{ steps.get_version.outputs.VERSION }}
|
||||||
|
body: |
|
||||||
|
## 🎉 Crawl4AI v${{ steps.get_version.outputs.VERSION }} Released!
|
||||||
|
|
||||||
|
### 📦 Installation
|
||||||
|
|
||||||
|
**PyPI:**
|
||||||
|
```bash
|
||||||
|
pip install crawl4ai==${{ steps.get_version.outputs.VERSION }}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Docker:**
|
||||||
|
```bash
|
||||||
|
docker pull unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}
|
||||||
|
docker pull unclecode/crawl4ai:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
### 📝 What's Changed
|
||||||
|
See [CHANGELOG.md](https://github.com/${{ github.repository }}/blob/main/CHANGELOG.md) for details.
|
||||||
|
draft: false
|
||||||
|
prerelease: false
|
||||||
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Summary
|
||||||
|
run: |
|
||||||
|
echo "## 🚀 Release Complete!" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "### 📦 PyPI Package" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- Version: ${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- URL: https://pypi.org/project/crawl4ai/" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- Install: \`pip install crawl4ai==${{ steps.get_version.outputs.VERSION }}\`" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "### 🐳 Docker Images" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- \`unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}\`" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- \`unclecode/crawl4ai:${{ steps.versions.outputs.MINOR }}\`" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- \`unclecode/crawl4ai:${{ steps.versions.outputs.MAJOR }}\`" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- \`unclecode/crawl4ai:latest\`" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "### 📋 GitHub Release" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "https://github.com/${{ github.repository }}/releases/tag/v${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
|
||||||
13
.gitignore
vendored
13
.gitignore
vendored
@@ -1,6 +1,13 @@
|
|||||||
# Scripts folder (private tools)
|
# Scripts folder (private tools)
|
||||||
.scripts/
|
.scripts/
|
||||||
|
|
||||||
|
# Database files
|
||||||
|
*.db
|
||||||
|
|
||||||
|
# Environment files
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
@@ -259,6 +266,8 @@ continue_config.json
|
|||||||
.llm.env
|
.llm.env
|
||||||
.private/
|
.private/
|
||||||
|
|
||||||
|
.claude/
|
||||||
|
|
||||||
CLAUDE_MONITOR.md
|
CLAUDE_MONITOR.md
|
||||||
CLAUDE.md
|
CLAUDE.md
|
||||||
|
|
||||||
@@ -270,4 +279,6 @@ docs/**/data
|
|||||||
.codecat/
|
.codecat/
|
||||||
|
|
||||||
docs/apps/linkdin/debug*/
|
docs/apps/linkdin/debug*/
|
||||||
docs/apps/linkdin/samples/insights/*
|
docs/apps/linkdin/samples/insights/*
|
||||||
|
|
||||||
|
scripts/
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
FROM python:3.12-slim-bookworm AS build
|
FROM python:3.12-slim-bookworm AS build
|
||||||
|
|
||||||
# C4ai version
|
# C4ai version
|
||||||
ARG C4AI_VER=0.7.0-r1
|
ARG C4AI_VER=0.7.6
|
||||||
ENV C4AI_VERSION=$C4AI_VER
|
ENV C4AI_VERSION=$C4AI_VER
|
||||||
LABEL c4ai.version=$C4AI_VER
|
LABEL c4ai.version=$C4AI_VER
|
||||||
|
|
||||||
|
|||||||
88
README.md
88
README.md
@@ -27,11 +27,13 @@
|
|||||||
|
|
||||||
Crawl4AI turns the web into clean, LLM ready Markdown for RAG, agents, and data pipelines. Fast, controllable, battle tested by a 50k+ star community.
|
Crawl4AI turns the web into clean, LLM ready Markdown for RAG, agents, and data pipelines. Fast, controllable, battle tested by a 50k+ star community.
|
||||||
|
|
||||||
[✨ Check out latest update v0.7.4](#-recent-updates)
|
[✨ Check out latest update v0.7.6](#-recent-updates)
|
||||||
|
|
||||||
✨ New in v0.7.4: Revolutionary LLM Table Extraction with intelligent chunking, enhanced concurrency fixes, memory management refactor, and critical stability improvements. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.4.md)
|
✨ **New in v0.7.6**: Complete Webhook Infrastructure for Docker Job Queue API! Real-time notifications for both `/crawl/job` and `/llm/job` endpoints with exponential backoff retry, custom headers, and flexible delivery modes. No more polling! [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.6.md)
|
||||||
|
|
||||||
✨ Recent v0.7.3: Undetected Browser Support, Multi-URL Configurations, Memory Monitoring, Enhanced Table Extraction, GitHub Sponsors. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.3.md)
|
✨ Recent v0.7.5: Docker Hooks System with function-based API for pipeline customization, Enhanced LLM Integration with custom providers, HTTPS Preservation, and multiple community-reported bug fixes. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.5.md)
|
||||||
|
|
||||||
|
✨ Previous v0.7.4: Revolutionary LLM Table Extraction with intelligent chunking, enhanced concurrency fixes, memory management refactor, and critical stability improvements. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.4.md)
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary>🤓 <strong>My Personal Story</strong></summary>
|
<summary>🤓 <strong>My Personal Story</strong></summary>
|
||||||
@@ -177,7 +179,7 @@ No rate-limited APIs. No lock-in. Build and own your data pipeline with direct g
|
|||||||
- 📸 **Screenshots**: Capture page screenshots during crawling for debugging or analysis.
|
- 📸 **Screenshots**: Capture page screenshots during crawling for debugging or analysis.
|
||||||
- 📂 **Raw Data Crawling**: Directly process raw HTML (`raw:`) or local files (`file://`).
|
- 📂 **Raw Data Crawling**: Directly process raw HTML (`raw:`) or local files (`file://`).
|
||||||
- 🔗 **Comprehensive Link Extraction**: Extracts internal, external links, and embedded iframe content.
|
- 🔗 **Comprehensive Link Extraction**: Extracts internal, external links, and embedded iframe content.
|
||||||
- 🛠️ **Customizable Hooks**: Define hooks at every step to customize crawling behavior.
|
- 🛠️ **Customizable Hooks**: Define hooks at every step to customize crawling behavior (supports both string and function-based APIs).
|
||||||
- 💾 **Caching**: Cache data for improved speed and to avoid redundant fetches.
|
- 💾 **Caching**: Cache data for improved speed and to avoid redundant fetches.
|
||||||
- 📄 **Metadata Extraction**: Retrieve structured metadata from web pages.
|
- 📄 **Metadata Extraction**: Retrieve structured metadata from web pages.
|
||||||
- 📡 **IFrame Content Extraction**: Seamless extraction from embedded iframe content.
|
- 📡 **IFrame Content Extraction**: Seamless extraction from embedded iframe content.
|
||||||
@@ -544,6 +546,54 @@ async def test_news_crawl():
|
|||||||
|
|
||||||
## ✨ Recent Updates
|
## ✨ Recent Updates
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary><strong>Version 0.7.5 Release Highlights - The Docker Hooks & Security Update</strong></summary>
|
||||||
|
|
||||||
|
- **🔧 Docker Hooks System**: Complete pipeline customization with user-provided Python functions at 8 key points
|
||||||
|
- **✨ Function-Based Hooks API (NEW)**: Write hooks as regular Python functions with full IDE support:
|
||||||
|
```python
|
||||||
|
from crawl4ai import hooks_to_string
|
||||||
|
from crawl4ai.docker_client import Crawl4aiDockerClient
|
||||||
|
|
||||||
|
# Define hooks as regular Python functions
|
||||||
|
async def on_page_context_created(page, context, **kwargs):
|
||||||
|
"""Block images to speed up crawling"""
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def before_goto(page, context, url, **kwargs):
|
||||||
|
"""Add custom headers"""
|
||||||
|
await page.set_extra_http_headers({'X-Crawl4AI': 'v0.7.5'})
|
||||||
|
return page
|
||||||
|
|
||||||
|
# Option 1: Use hooks_to_string() utility for REST API
|
||||||
|
hooks_code = hooks_to_string({
|
||||||
|
"on_page_context_created": on_page_context_created,
|
||||||
|
"before_goto": before_goto
|
||||||
|
})
|
||||||
|
|
||||||
|
# Option 2: Docker client with automatic conversion (Recommended)
|
||||||
|
client = Crawl4aiDockerClient(base_url="http://localhost:11235")
|
||||||
|
results = await client.crawl(
|
||||||
|
urls=["https://httpbin.org/html"],
|
||||||
|
hooks={
|
||||||
|
"on_page_context_created": on_page_context_created,
|
||||||
|
"before_goto": before_goto
|
||||||
|
}
|
||||||
|
)
|
||||||
|
# ✓ Full IDE support, type checking, and reusability!
|
||||||
|
```
|
||||||
|
|
||||||
|
- **🤖 Enhanced LLM Integration**: Custom providers with temperature control and base_url configuration
|
||||||
|
- **🔒 HTTPS Preservation**: Secure internal link handling with `preserve_https_for_internal_links=True`
|
||||||
|
- **🐍 Python 3.10+ Support**: Modern language features and enhanced performance
|
||||||
|
- **🛠️ Bug Fixes**: Resolved multiple community-reported issues including URL processing, JWT authentication, and proxy configuration
|
||||||
|
|
||||||
|
[Full v0.7.5 Release Notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.5.md)
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary><strong>Version 0.7.4 Release Highlights - The Intelligent Table Extraction & Performance Update</strong></summary>
|
<summary><strong>Version 0.7.4 Release Highlights - The Intelligent Table Extraction & Performance Update</strong></summary>
|
||||||
|
|
||||||
@@ -919,6 +969,36 @@ We envision a future where AI is powered by real human knowledge, ensuring data
|
|||||||
For more details, see our [full mission statement](./MISSION.md).
|
For more details, see our [full mission statement](./MISSION.md).
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
## 🌟 Current Sponsors
|
||||||
|
|
||||||
|
### 🏢 Enterprise Sponsors & Partners
|
||||||
|
|
||||||
|
Our enterprise sponsors and technology partners help scale Crawl4AI to power production-grade data pipelines.
|
||||||
|
|
||||||
|
| Company | About | Sponsorship Tier |
|
||||||
|
|------|------|----------------------------|
|
||||||
|
| <a href="https://dashboard.capsolver.com/passport/register?inviteCode=ESVSECTX5Q23" target="_blank"><picture><source width="120" media="(prefers-color-scheme: dark)" srcset="https://docs.crawl4ai.com/uploads/sponsors/20251013045338_72a71fa4ee4d2f40.png"><source width="120" media="(prefers-color-scheme: light)" srcset="https://www.capsolver.com/assets/images/logo-text.png"><img alt="Capsolver" src="https://www.capsolver.com/assets/images/logo-text.png"></picture></a> | AI-powered Captcha solving service. Supports all major Captcha types, including reCAPTCHA, Cloudflare, and more | 🥈 Silver |
|
||||||
|
| <a href="https://kipo.ai" target="_blank"><img src="https://docs.crawl4ai.com/uploads/sponsors/20251013045751_2d54f57f117c651e.png" alt="DataSync" width="120"/></a> | Helps engineers and buyers find, compare, and source electronic & industrial parts in seconds, with specs, pricing, lead times & alternatives.| 🥇 Gold |
|
||||||
|
| <a href="https://www.kidocode.com/" target="_blank"><img src="https://docs.crawl4ai.com/uploads/sponsors/20251013045045_bb8dace3f0440d65.svg" alt="Kidocode" width="120"/><p align="center">KidoCode</p></a> | Kidocode is a hybrid technology and entrepreneurship school for kids aged 5–18, offering both online and on-campus education. | 🥇 Gold |
|
||||||
|
| <a href="https://www.alephnull.sg/" target="_blank"><img src="https://docs.crawl4ai.com/uploads/sponsors/20251013050323_a9e8e8c4c3650421.svg" alt="Aleph null" width="120"/></a> | Singapore-based Aleph Null is Asia’s leading edtech hub, dedicated to student-centric, AI-driven education—empowering learners with the tools to thrive in a fast-changing world. | 🥇 Gold |
|
||||||
|
|
||||||
|
### 🧑🤝 Individual Sponsors
|
||||||
|
|
||||||
|
A heartfelt thanks to our individual supporters! Every contribution helps us keep our opensource mission alive and thriving!
|
||||||
|
|
||||||
|
<p align="left">
|
||||||
|
<a href="https://github.com/hafezparast"><img src="https://avatars.githubusercontent.com/u/14273305?s=60&v=4" style="border-radius:50%;" width="64px;"/></a>
|
||||||
|
<a href="https://github.com/ntohidi"><img src="https://avatars.githubusercontent.com/u/17140097?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
|
||||||
|
<a href="https://github.com/Sjoeborg"><img src="https://avatars.githubusercontent.com/u/17451310?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
|
||||||
|
<a href="https://github.com/romek-rozen"><img src="https://avatars.githubusercontent.com/u/30595969?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
|
||||||
|
<a href="https://github.com/Kourosh-Kiyani"><img src="https://avatars.githubusercontent.com/u/34105600?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
|
||||||
|
<a href="https://github.com/Etherdrake"><img src="https://avatars.githubusercontent.com/u/67021215?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
|
||||||
|
<a href="https://github.com/shaman247"><img src="https://avatars.githubusercontent.com/u/211010067?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
|
||||||
|
<a href="https://github.com/work-flow-manager"><img src="https://avatars.githubusercontent.com/u/217665461?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
> Want to join them? [Sponsor Crawl4AI →](https://github.com/sponsors/unclecode)
|
||||||
|
|
||||||
## Star History
|
## Star History
|
||||||
|
|
||||||
[](https://star-history.com/#unclecode/crawl4ai&Date)
|
[](https://star-history.com/#unclecode/crawl4ai&Date)
|
||||||
|
|||||||
@@ -103,7 +103,8 @@ from .browser_adapter import (
|
|||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
start_colab_display_server,
|
start_colab_display_server,
|
||||||
setup_colab_environment
|
setup_colab_environment,
|
||||||
|
hooks_to_string
|
||||||
)
|
)
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
@@ -183,6 +184,7 @@ __all__ = [
|
|||||||
"ProxyConfig",
|
"ProxyConfig",
|
||||||
"start_colab_display_server",
|
"start_colab_display_server",
|
||||||
"setup_colab_environment",
|
"setup_colab_environment",
|
||||||
|
"hooks_to_string",
|
||||||
# C4A Script additions
|
# C4A Script additions
|
||||||
"c4a_compile",
|
"c4a_compile",
|
||||||
"c4a_validate",
|
"c4a_validate",
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
# crawl4ai/__version__.py
|
# crawl4ai/__version__.py
|
||||||
|
|
||||||
# This is the version that will be used for stable releases
|
# This is the version that will be used for stable releases
|
||||||
__version__ = "0.7.4"
|
__version__ = "0.7.6"
|
||||||
|
|
||||||
# For nightly builds, this gets set during build process
|
# For nightly builds, this gets set during build process
|
||||||
__nightly_version__ = None
|
__nightly_version__ = None
|
||||||
|
|||||||
@@ -455,8 +455,6 @@ class MemoryAdaptiveDispatcher(BaseDispatcher):
|
|||||||
|
|
||||||
# Update priorities for waiting tasks if needed
|
# Update priorities for waiting tasks if needed
|
||||||
await self._update_queue_priorities()
|
await self._update_queue_priorities()
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self.monitor:
|
if self.monitor:
|
||||||
@@ -467,6 +465,7 @@ class MemoryAdaptiveDispatcher(BaseDispatcher):
|
|||||||
memory_monitor.cancel()
|
memory_monitor.cancel()
|
||||||
if self.monitor:
|
if self.monitor:
|
||||||
self.monitor.stop()
|
self.monitor.stop()
|
||||||
|
return results
|
||||||
|
|
||||||
async def _update_queue_priorities(self):
|
async def _update_queue_priorities(self):
|
||||||
"""Periodically update priorities of items in the queue to prevent starvation"""
|
"""Periodically update priorities of items in the queue to prevent starvation"""
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from typing import List, Optional, Union, AsyncGenerator, Dict, Any
|
from typing import List, Optional, Union, AsyncGenerator, Dict, Any, Callable
|
||||||
import httpx
|
import httpx
|
||||||
import json
|
import json
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
@@ -7,6 +7,7 @@ import asyncio
|
|||||||
from .async_configs import BrowserConfig, CrawlerRunConfig
|
from .async_configs import BrowserConfig, CrawlerRunConfig
|
||||||
from .models import CrawlResult
|
from .models import CrawlResult
|
||||||
from .async_logger import AsyncLogger, LogLevel
|
from .async_logger import AsyncLogger, LogLevel
|
||||||
|
from .utils import hooks_to_string
|
||||||
|
|
||||||
|
|
||||||
class Crawl4aiClientError(Exception):
|
class Crawl4aiClientError(Exception):
|
||||||
@@ -70,17 +71,41 @@ class Crawl4aiDockerClient:
|
|||||||
self.logger.error(f"Server unreachable: {str(e)}", tag="ERROR")
|
self.logger.error(f"Server unreachable: {str(e)}", tag="ERROR")
|
||||||
raise ConnectionError(f"Cannot connect to server: {str(e)}")
|
raise ConnectionError(f"Cannot connect to server: {str(e)}")
|
||||||
|
|
||||||
def _prepare_request(self, urls: List[str], browser_config: Optional[BrowserConfig] = None,
|
def _prepare_request(
|
||||||
crawler_config: Optional[CrawlerRunConfig] = None) -> Dict[str, Any]:
|
self,
|
||||||
|
urls: List[str],
|
||||||
|
browser_config: Optional[BrowserConfig] = None,
|
||||||
|
crawler_config: Optional[CrawlerRunConfig] = None,
|
||||||
|
hooks: Optional[Union[Dict[str, Callable], Dict[str, str]]] = None,
|
||||||
|
hooks_timeout: int = 30
|
||||||
|
) -> Dict[str, Any]:
|
||||||
"""Prepare request data from configs."""
|
"""Prepare request data from configs."""
|
||||||
if self._token:
|
if self._token:
|
||||||
self._http_client.headers["Authorization"] = f"Bearer {self._token}"
|
self._http_client.headers["Authorization"] = f"Bearer {self._token}"
|
||||||
return {
|
|
||||||
|
request_data = {
|
||||||
"urls": urls,
|
"urls": urls,
|
||||||
"browser_config": browser_config.dump() if browser_config else {},
|
"browser_config": browser_config.dump() if browser_config else {},
|
||||||
"crawler_config": crawler_config.dump() if crawler_config else {}
|
"crawler_config": crawler_config.dump() if crawler_config else {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Handle hooks if provided
|
||||||
|
if hooks:
|
||||||
|
# Check if hooks are already strings or need conversion
|
||||||
|
if any(callable(v) for v in hooks.values()):
|
||||||
|
# Convert function objects to strings
|
||||||
|
hooks_code = hooks_to_string(hooks)
|
||||||
|
else:
|
||||||
|
# Already in string format
|
||||||
|
hooks_code = hooks
|
||||||
|
|
||||||
|
request_data["hooks"] = {
|
||||||
|
"code": hooks_code,
|
||||||
|
"timeout": hooks_timeout
|
||||||
|
}
|
||||||
|
|
||||||
|
return request_data
|
||||||
|
|
||||||
async def _request(self, method: str, endpoint: str, **kwargs) -> httpx.Response:
|
async def _request(self, method: str, endpoint: str, **kwargs) -> httpx.Response:
|
||||||
"""Make an HTTP request with error handling."""
|
"""Make an HTTP request with error handling."""
|
||||||
url = urljoin(self.base_url, endpoint)
|
url = urljoin(self.base_url, endpoint)
|
||||||
@@ -102,16 +127,42 @@ class Crawl4aiDockerClient:
|
|||||||
self,
|
self,
|
||||||
urls: List[str],
|
urls: List[str],
|
||||||
browser_config: Optional[BrowserConfig] = None,
|
browser_config: Optional[BrowserConfig] = None,
|
||||||
crawler_config: Optional[CrawlerRunConfig] = None
|
crawler_config: Optional[CrawlerRunConfig] = None,
|
||||||
|
hooks: Optional[Union[Dict[str, Callable], Dict[str, str]]] = None,
|
||||||
|
hooks_timeout: int = 30
|
||||||
) -> Union[CrawlResult, List[CrawlResult], AsyncGenerator[CrawlResult, None]]:
|
) -> Union[CrawlResult, List[CrawlResult], AsyncGenerator[CrawlResult, None]]:
|
||||||
"""Execute a crawl operation."""
|
"""
|
||||||
|
Execute a crawl operation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
urls: List of URLs to crawl
|
||||||
|
browser_config: Browser configuration
|
||||||
|
crawler_config: Crawler configuration
|
||||||
|
hooks: Optional hooks - can be either:
|
||||||
|
- Dict[str, Callable]: Function objects that will be converted to strings
|
||||||
|
- Dict[str, str]: Already stringified hook code
|
||||||
|
hooks_timeout: Timeout in seconds for each hook execution (1-120)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Single CrawlResult, list of results, or async generator for streaming
|
||||||
|
|
||||||
|
Example with function hooks:
|
||||||
|
>>> async def my_hook(page, context, **kwargs):
|
||||||
|
... await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
... return page
|
||||||
|
>>>
|
||||||
|
>>> result = await client.crawl(
|
||||||
|
... ["https://example.com"],
|
||||||
|
... hooks={"on_page_context_created": my_hook}
|
||||||
|
... )
|
||||||
|
"""
|
||||||
await self._check_server()
|
await self._check_server()
|
||||||
|
|
||||||
data = self._prepare_request(urls, browser_config, crawler_config)
|
data = self._prepare_request(urls, browser_config, crawler_config, hooks, hooks_timeout)
|
||||||
is_streaming = crawler_config and crawler_config.stream
|
is_streaming = crawler_config and crawler_config.stream
|
||||||
|
|
||||||
self.logger.info(f"Crawling {len(urls)} URLs {'(streaming)' if is_streaming else ''}", tag="CRAWL")
|
self.logger.info(f"Crawling {len(urls)} URLs {'(streaming)' if is_streaming else ''}", tag="CRAWL")
|
||||||
|
|
||||||
if is_streaming:
|
if is_streaming:
|
||||||
async def stream_results() -> AsyncGenerator[CrawlResult, None]:
|
async def stream_results() -> AsyncGenerator[CrawlResult, None]:
|
||||||
async with self._http_client.stream("POST", f"{self.base_url}/crawl/stream", json=data) as response:
|
async with self._http_client.stream("POST", f"{self.base_url}/crawl/stream", json=data) as response:
|
||||||
@@ -128,12 +179,12 @@ class Crawl4aiDockerClient:
|
|||||||
else:
|
else:
|
||||||
yield CrawlResult(**result)
|
yield CrawlResult(**result)
|
||||||
return stream_results()
|
return stream_results()
|
||||||
|
|
||||||
response = await self._request("POST", "/crawl", json=data)
|
response = await self._request("POST", "/crawl", json=data)
|
||||||
result_data = response.json()
|
result_data = response.json()
|
||||||
if not result_data.get("success", False):
|
if not result_data.get("success", False):
|
||||||
raise RequestError(f"Crawl failed: {result_data.get('msg', 'Unknown error')}")
|
raise RequestError(f"Crawl failed: {result_data.get('msg', 'Unknown error')}")
|
||||||
|
|
||||||
results = [CrawlResult(**r) for r in result_data.get("results", [])]
|
results = [CrawlResult(**r) for r in result_data.get("results", [])]
|
||||||
self.logger.success(f"Crawl completed with {len(results)} results", tag="CRAWL")
|
self.logger.success(f"Crawl completed with {len(results)} results", tag="CRAWL")
|
||||||
return results[0] if len(results) == 1 else results
|
return results[0] if len(results) == 1 else results
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ from urllib.parse import (
|
|||||||
urljoin, urlparse, urlunparse,
|
urljoin, urlparse, urlunparse,
|
||||||
parse_qsl, urlencode, quote, unquote
|
parse_qsl, urlencode, quote, unquote
|
||||||
)
|
)
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
|
||||||
# Monkey patch to fix wildcard handling in urllib.robotparser
|
# Monkey patch to fix wildcard handling in urllib.robotparser
|
||||||
@@ -3529,4 +3530,52 @@ def get_memory_stats() -> Tuple[float, float, float]:
|
|||||||
available_gb = get_true_available_memory_gb()
|
available_gb = get_true_available_memory_gb()
|
||||||
used_percent = get_true_memory_usage_percent()
|
used_percent = get_true_memory_usage_percent()
|
||||||
|
|
||||||
return used_percent, available_gb, total_gb
|
return used_percent, available_gb, total_gb
|
||||||
|
|
||||||
|
|
||||||
|
# Hook utilities for Docker API
|
||||||
|
def hooks_to_string(hooks: Dict[str, Callable]) -> Dict[str, str]:
|
||||||
|
"""
|
||||||
|
Convert hook function objects to string representations for Docker API.
|
||||||
|
|
||||||
|
This utility simplifies the process of using hooks with the Docker API by converting
|
||||||
|
Python function objects into the string format required by the API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hooks: Dictionary mapping hook point names to Python function objects.
|
||||||
|
Functions should be async and follow hook signature requirements.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary mapping hook point names to string representations of the functions.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> async def my_hook(page, context, **kwargs):
|
||||||
|
... await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
... return page
|
||||||
|
>>>
|
||||||
|
>>> hooks_dict = {"on_page_context_created": my_hook}
|
||||||
|
>>> api_hooks = hooks_to_string(hooks_dict)
|
||||||
|
>>> # api_hooks is now ready to use with Docker API
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If a hook is not callable or source cannot be extracted
|
||||||
|
"""
|
||||||
|
result = {}
|
||||||
|
|
||||||
|
for hook_name, hook_func in hooks.items():
|
||||||
|
if not callable(hook_func):
|
||||||
|
raise ValueError(f"Hook '{hook_name}' must be a callable function, got {type(hook_func)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get the source code of the function
|
||||||
|
source = inspect.getsource(hook_func)
|
||||||
|
# Remove any leading indentation to get clean source
|
||||||
|
source = textwrap.dedent(source)
|
||||||
|
result[hook_name] = source
|
||||||
|
except (OSError, TypeError) as e:
|
||||||
|
raise ValueError(
|
||||||
|
f"Cannot extract source code for hook '{hook_name}'. "
|
||||||
|
f"Make sure the function is defined in a file (not interactively). Error: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
- [Python SDK](#python-sdk)
|
- [Python SDK](#python-sdk)
|
||||||
- [Understanding Request Schema](#understanding-request-schema)
|
- [Understanding Request Schema](#understanding-request-schema)
|
||||||
- [REST API Examples](#rest-api-examples)
|
- [REST API Examples](#rest-api-examples)
|
||||||
|
- [Asynchronous Jobs with Webhooks](#asynchronous-jobs-with-webhooks)
|
||||||
- [Additional API Endpoints](#additional-api-endpoints)
|
- [Additional API Endpoints](#additional-api-endpoints)
|
||||||
- [HTML Extraction Endpoint](#html-extraction-endpoint)
|
- [HTML Extraction Endpoint](#html-extraction-endpoint)
|
||||||
- [Screenshot Endpoint](#screenshot-endpoint)
|
- [Screenshot Endpoint](#screenshot-endpoint)
|
||||||
@@ -58,15 +59,13 @@ Pull and run images directly from Docker Hub without building locally.
|
|||||||
|
|
||||||
#### 1. Pull the Image
|
#### 1. Pull the Image
|
||||||
|
|
||||||
Our latest release candidate is `0.7.0-r1`. Images are built with multi-arch manifests, so Docker automatically pulls the correct version for your system.
|
Our latest stable release is `0.7.6`. Images are built with multi-arch manifests, so Docker automatically pulls the correct version for your system.
|
||||||
|
|
||||||
> ⚠️ **Important Note**: The `latest` tag currently points to the stable `0.6.0` version. After testing and validation, `0.7.0` (without -r1) will be released and `latest` will be updated. For now, please use `0.7.0-r1` to test the new features.
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Pull the release candidate (for testing new features)
|
# Pull the latest stable version (0.7.6)
|
||||||
docker pull unclecode/crawl4ai:0.7.0-r1
|
docker pull unclecode/crawl4ai:0.7.6
|
||||||
|
|
||||||
# Or pull the current stable version (0.6.0)
|
# Or use the latest tag (points to 0.7.6)
|
||||||
docker pull unclecode/crawl4ai:latest
|
docker pull unclecode/crawl4ai:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -101,7 +100,7 @@ EOL
|
|||||||
-p 11235:11235 \
|
-p 11235:11235 \
|
||||||
--name crawl4ai \
|
--name crawl4ai \
|
||||||
--shm-size=1g \
|
--shm-size=1g \
|
||||||
unclecode/crawl4ai:0.7.0-r1
|
unclecode/crawl4ai:0.7.6
|
||||||
```
|
```
|
||||||
|
|
||||||
* **With LLM support:**
|
* **With LLM support:**
|
||||||
@@ -112,7 +111,7 @@ EOL
|
|||||||
--name crawl4ai \
|
--name crawl4ai \
|
||||||
--env-file .llm.env \
|
--env-file .llm.env \
|
||||||
--shm-size=1g \
|
--shm-size=1g \
|
||||||
unclecode/crawl4ai:0.7.0-r1
|
unclecode/crawl4ai:0.7.6
|
||||||
```
|
```
|
||||||
|
|
||||||
> The server will be available at `http://localhost:11235`. Visit `/playground` to access the interactive testing interface.
|
> The server will be available at `http://localhost:11235`. Visit `/playground` to access the interactive testing interface.
|
||||||
@@ -185,7 +184,7 @@ The `docker-compose.yml` file in the project root provides a simplified approach
|
|||||||
```bash
|
```bash
|
||||||
# Pulls and runs the release candidate from Docker Hub
|
# Pulls and runs the release candidate from Docker Hub
|
||||||
# Automatically selects the correct architecture
|
# Automatically selects the correct architecture
|
||||||
IMAGE=unclecode/crawl4ai:0.7.0-r1 docker compose up -d
|
IMAGE=unclecode/crawl4ai:0.7.6 docker compose up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
* **Build and Run Locally:**
|
* **Build and Run Locally:**
|
||||||
@@ -648,6 +647,194 @@ async def test_stream_crawl(token: str = None): # Made token optional
|
|||||||
# asyncio.run(test_stream_crawl())
|
# asyncio.run(test_stream_crawl())
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Asynchronous Jobs with Webhooks
|
||||||
|
|
||||||
|
For long-running crawls or when you want to avoid keeping connections open, use the job queue endpoints. Instead of polling for results, configure a webhook to receive notifications when jobs complete.
|
||||||
|
|
||||||
|
#### Why Use Jobs & Webhooks?
|
||||||
|
|
||||||
|
- **No Polling Required** - Get notified when crawls complete instead of constantly checking status
|
||||||
|
- **Better Resource Usage** - Free up client connections while jobs run in the background
|
||||||
|
- **Scalable Architecture** - Ideal for high-volume crawling with TypeScript/Node.js clients or microservices
|
||||||
|
- **Reliable Delivery** - Automatic retry with exponential backoff (5 attempts: 1s → 2s → 4s → 8s → 16s)
|
||||||
|
|
||||||
|
#### How It Works
|
||||||
|
|
||||||
|
1. **Submit Job** → POST to `/crawl/job` with optional `webhook_config`
|
||||||
|
2. **Get Task ID** → Receive a `task_id` immediately
|
||||||
|
3. **Job Runs** → Crawl executes in the background
|
||||||
|
4. **Webhook Fired** → Server POSTs completion notification to your webhook URL
|
||||||
|
5. **Fetch Results** → If data wasn't included in webhook, GET `/crawl/job/{task_id}`
|
||||||
|
|
||||||
|
#### Quick Example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Submit a crawl job with webhook notification
|
||||||
|
curl -X POST http://localhost:11235/crawl/job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhooks/crawl-complete",
|
||||||
|
"webhook_data_in_payload": false
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Response: {"task_id": "crawl_a1b2c3d4"}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Your webhook receives:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"task_id": "crawl_a1b2c3d4",
|
||||||
|
"task_type": "crawl",
|
||||||
|
"status": "completed",
|
||||||
|
"timestamp": "2025-10-21T10:30:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Then fetch the results:
|
||||||
|
```bash
|
||||||
|
curl http://localhost:11235/crawl/job/crawl_a1b2c3d4
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Include Data in Webhook
|
||||||
|
|
||||||
|
Set `webhook_data_in_payload: true` to receive the full crawl results directly in the webhook:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:11235/crawl/job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhooks/crawl-complete",
|
||||||
|
"webhook_data_in_payload": true
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Your webhook receives the complete data:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"task_id": "crawl_a1b2c3d4",
|
||||||
|
"task_type": "crawl",
|
||||||
|
"status": "completed",
|
||||||
|
"timestamp": "2025-10-21T10:30:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"data": {
|
||||||
|
"markdown": "...",
|
||||||
|
"html": "...",
|
||||||
|
"links": {...},
|
||||||
|
"metadata": {...}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Webhook Authentication
|
||||||
|
|
||||||
|
Add custom headers for authentication:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhooks/crawl",
|
||||||
|
"webhook_data_in_payload": false,
|
||||||
|
"webhook_headers": {
|
||||||
|
"X-Webhook-Secret": "your-secret-token",
|
||||||
|
"X-Service-ID": "crawl4ai-prod"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Global Default Webhook
|
||||||
|
|
||||||
|
Configure a default webhook URL in `config.yml` for all jobs:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
webhooks:
|
||||||
|
enabled: true
|
||||||
|
default_url: "https://myapp.com/webhooks/default"
|
||||||
|
data_in_payload: false
|
||||||
|
retry:
|
||||||
|
max_attempts: 5
|
||||||
|
initial_delay_ms: 1000
|
||||||
|
max_delay_ms: 32000
|
||||||
|
timeout_ms: 30000
|
||||||
|
```
|
||||||
|
|
||||||
|
Now jobs without `webhook_config` automatically use the default webhook.
|
||||||
|
|
||||||
|
#### Job Status Polling (Without Webhooks)
|
||||||
|
|
||||||
|
If you prefer polling instead of webhooks, just omit `webhook_config`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Submit job
|
||||||
|
curl -X POST http://localhost:11235/crawl/job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"urls": ["https://example.com"]}'
|
||||||
|
# Response: {"task_id": "crawl_xyz"}
|
||||||
|
|
||||||
|
# Poll for status
|
||||||
|
curl http://localhost:11235/crawl/job/crawl_xyz
|
||||||
|
```
|
||||||
|
|
||||||
|
The response includes `status` field: `"processing"`, `"completed"`, or `"failed"`.
|
||||||
|
|
||||||
|
#### LLM Extraction Jobs with Webhooks
|
||||||
|
|
||||||
|
The same webhook system works for LLM extraction jobs via `/llm/job`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Submit LLM extraction job with webhook
|
||||||
|
curl -X POST http://localhost:11235/llm/job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"url": "https://example.com/article",
|
||||||
|
"q": "Extract the article title, author, and main points",
|
||||||
|
"provider": "openai/gpt-4o-mini",
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhooks/llm-complete",
|
||||||
|
"webhook_data_in_payload": true,
|
||||||
|
"webhook_headers": {
|
||||||
|
"X-Webhook-Secret": "your-secret-token"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Response: {"task_id": "llm_1234567890"}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Your webhook receives:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"task_id": "llm_1234567890",
|
||||||
|
"task_type": "llm_extraction",
|
||||||
|
"status": "completed",
|
||||||
|
"timestamp": "2025-10-22T12:30:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com/article"],
|
||||||
|
"data": {
|
||||||
|
"extracted_content": {
|
||||||
|
"title": "Understanding Web Scraping",
|
||||||
|
"author": "John Doe",
|
||||||
|
"main_points": ["Point 1", "Point 2", "Point 3"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key Differences for LLM Jobs:**
|
||||||
|
- Task type is `"llm_extraction"` instead of `"crawl"`
|
||||||
|
- Extracted data is in `data.extracted_content`
|
||||||
|
- Single URL only (not an array)
|
||||||
|
- Supports schema-based extraction with `schema` parameter
|
||||||
|
|
||||||
|
> 💡 **Pro tip**: See [WEBHOOK_EXAMPLES.md](./WEBHOOK_EXAMPLES.md) for detailed examples including TypeScript client code, Flask webhook handlers, and failure handling.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Metrics & Monitoring
|
## Metrics & Monitoring
|
||||||
@@ -826,10 +1013,11 @@ We're here to help you succeed with Crawl4AI! Here's how to get support:
|
|||||||
|
|
||||||
In this guide, we've covered everything you need to get started with Crawl4AI's Docker deployment:
|
In this guide, we've covered everything you need to get started with Crawl4AI's Docker deployment:
|
||||||
- Building and running the Docker container
|
- Building and running the Docker container
|
||||||
- Configuring the environment
|
- Configuring the environment
|
||||||
- Using the interactive playground for testing
|
- Using the interactive playground for testing
|
||||||
- Making API requests with proper typing
|
- Making API requests with proper typing
|
||||||
- Using the Python SDK
|
- Using the Python SDK
|
||||||
|
- Asynchronous job queues with webhook notifications
|
||||||
- Leveraging specialized endpoints for screenshots, PDFs, and JavaScript execution
|
- Leveraging specialized endpoints for screenshots, PDFs, and JavaScript execution
|
||||||
- Connecting via the Model Context Protocol (MCP)
|
- Connecting via the Model Context Protocol (MCP)
|
||||||
- Monitoring your deployment
|
- Monitoring your deployment
|
||||||
|
|||||||
378
deploy/docker/WEBHOOK_EXAMPLES.md
Normal file
378
deploy/docker/WEBHOOK_EXAMPLES.md
Normal file
@@ -0,0 +1,378 @@
|
|||||||
|
# Webhook Feature Examples
|
||||||
|
|
||||||
|
This document provides examples of how to use the webhook feature for crawl jobs in Crawl4AI.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The webhook feature allows you to receive notifications when crawl jobs complete, eliminating the need for polling. Webhooks are sent with exponential backoff retry logic to ensure reliable delivery.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
### Global Configuration (config.yml)
|
||||||
|
|
||||||
|
You can configure default webhook settings in `config.yml`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
webhooks:
|
||||||
|
enabled: true
|
||||||
|
default_url: null # Optional: default webhook URL for all jobs
|
||||||
|
data_in_payload: false # Optional: default behavior for including data
|
||||||
|
retry:
|
||||||
|
max_attempts: 5
|
||||||
|
initial_delay_ms: 1000 # 1s, 2s, 4s, 8s, 16s exponential backoff
|
||||||
|
max_delay_ms: 32000
|
||||||
|
timeout_ms: 30000 # 30s timeout per webhook call
|
||||||
|
headers: # Optional: default headers to include
|
||||||
|
User-Agent: "Crawl4AI-Webhook/1.0"
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Usage Examples
|
||||||
|
|
||||||
|
### Example 1: Basic Webhook (Notification Only)
|
||||||
|
|
||||||
|
Send a webhook notification without including the crawl data in the payload.
|
||||||
|
|
||||||
|
**Request:**
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:11235/crawl/job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhooks/crawl-complete",
|
||||||
|
"webhook_data_in_payload": false
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"task_id": "crawl_a1b2c3d4"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Webhook Payload Received:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"task_id": "crawl_a1b2c3d4",
|
||||||
|
"task_type": "crawl",
|
||||||
|
"status": "completed",
|
||||||
|
"timestamp": "2025-10-21T10:30:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Your webhook handler should then fetch the results:
|
||||||
|
```bash
|
||||||
|
curl http://localhost:11235/crawl/job/crawl_a1b2c3d4
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example 2: Webhook with Data Included
|
||||||
|
|
||||||
|
Include the full crawl results in the webhook payload.
|
||||||
|
|
||||||
|
**Request:**
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:11235/crawl/job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhooks/crawl-complete",
|
||||||
|
"webhook_data_in_payload": true
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Webhook Payload Received:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"task_id": "crawl_a1b2c3d4",
|
||||||
|
"task_type": "crawl",
|
||||||
|
"status": "completed",
|
||||||
|
"timestamp": "2025-10-21T10:30:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"data": {
|
||||||
|
"markdown": "...",
|
||||||
|
"html": "...",
|
||||||
|
"links": {...},
|
||||||
|
"metadata": {...}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example 3: Webhook with Custom Headers
|
||||||
|
|
||||||
|
Include custom headers for authentication or identification.
|
||||||
|
|
||||||
|
**Request:**
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:11235/crawl/job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhooks/crawl-complete",
|
||||||
|
"webhook_data_in_payload": false,
|
||||||
|
"webhook_headers": {
|
||||||
|
"X-Webhook-Secret": "my-secret-token",
|
||||||
|
"X-Service-ID": "crawl4ai-production"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
The webhook will be sent with these additional headers plus the default headers from config.
|
||||||
|
|
||||||
|
### Example 4: Failure Notification
|
||||||
|
|
||||||
|
When a crawl job fails, a webhook is sent with error details.
|
||||||
|
|
||||||
|
**Webhook Payload on Failure:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"task_id": "crawl_a1b2c3d4",
|
||||||
|
"task_type": "crawl",
|
||||||
|
"status": "failed",
|
||||||
|
"timestamp": "2025-10-21T10:30:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"error": "Connection timeout after 30s"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example 5: Using Global Default Webhook
|
||||||
|
|
||||||
|
If you set a `default_url` in config.yml, jobs without webhook_config will use it:
|
||||||
|
|
||||||
|
**config.yml:**
|
||||||
|
```yaml
|
||||||
|
webhooks:
|
||||||
|
enabled: true
|
||||||
|
default_url: "https://myapp.com/webhooks/default"
|
||||||
|
data_in_payload: false
|
||||||
|
```
|
||||||
|
|
||||||
|
**Request (no webhook_config needed):**
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:11235/crawl/job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"urls": ["https://example.com"]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
The webhook will be sent to the default URL configured in config.yml.
|
||||||
|
|
||||||
|
### Example 6: LLM Extraction Job with Webhook
|
||||||
|
|
||||||
|
Use webhooks with the LLM extraction endpoint for asynchronous processing.
|
||||||
|
|
||||||
|
**Request:**
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:11235/llm/job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"url": "https://example.com/article",
|
||||||
|
"q": "Extract the article title, author, and publication date",
|
||||||
|
"schema": "{\"type\": \"object\", \"properties\": {\"title\": {\"type\": \"string\"}, \"author\": {\"type\": \"string\"}, \"date\": {\"type\": \"string\"}}}",
|
||||||
|
"cache": false,
|
||||||
|
"provider": "openai/gpt-4o-mini",
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhooks/llm-complete",
|
||||||
|
"webhook_data_in_payload": true
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"task_id": "llm_1698765432_12345"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Webhook Payload Received:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"task_id": "llm_1698765432_12345",
|
||||||
|
"task_type": "llm_extraction",
|
||||||
|
"status": "completed",
|
||||||
|
"timestamp": "2025-10-21T10:30:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com/article"],
|
||||||
|
"data": {
|
||||||
|
"extracted_content": {
|
||||||
|
"title": "Understanding Web Scraping",
|
||||||
|
"author": "John Doe",
|
||||||
|
"date": "2025-10-21"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Webhook Handler Example
|
||||||
|
|
||||||
|
Here's a simple Python Flask webhook handler that supports both crawl and LLM extraction jobs:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from flask import Flask, request, jsonify
|
||||||
|
import requests
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@app.route('/webhooks/crawl-complete', methods=['POST'])
|
||||||
|
def handle_crawl_webhook():
|
||||||
|
payload = request.json
|
||||||
|
|
||||||
|
task_id = payload['task_id']
|
||||||
|
task_type = payload['task_type']
|
||||||
|
status = payload['status']
|
||||||
|
|
||||||
|
if status == 'completed':
|
||||||
|
# If data not in payload, fetch it
|
||||||
|
if 'data' not in payload:
|
||||||
|
# Determine endpoint based on task type
|
||||||
|
endpoint = 'crawl' if task_type == 'crawl' else 'llm'
|
||||||
|
response = requests.get(f'http://localhost:11235/{endpoint}/job/{task_id}')
|
||||||
|
data = response.json()
|
||||||
|
else:
|
||||||
|
data = payload['data']
|
||||||
|
|
||||||
|
# Process based on task type
|
||||||
|
if task_type == 'crawl':
|
||||||
|
print(f"Processing crawl results for {task_id}")
|
||||||
|
# Handle crawl results
|
||||||
|
results = data.get('results', [])
|
||||||
|
for result in results:
|
||||||
|
print(f" - {result.get('url')}: {len(result.get('markdown', ''))} chars")
|
||||||
|
|
||||||
|
elif task_type == 'llm_extraction':
|
||||||
|
print(f"Processing LLM extraction for {task_id}")
|
||||||
|
# Handle LLM extraction
|
||||||
|
# Note: Webhook sends 'extracted_content', API returns 'result'
|
||||||
|
extracted = data.get('extracted_content', data.get('result', {}))
|
||||||
|
print(f" - Extracted: {extracted}")
|
||||||
|
|
||||||
|
# Your business logic here...
|
||||||
|
|
||||||
|
elif status == 'failed':
|
||||||
|
error = payload.get('error', 'Unknown error')
|
||||||
|
print(f"{task_type} job {task_id} failed: {error}")
|
||||||
|
# Handle failure...
|
||||||
|
|
||||||
|
return jsonify({"status": "received"}), 200
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(port=8080)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Retry Logic
|
||||||
|
|
||||||
|
The webhook delivery service uses exponential backoff retry logic:
|
||||||
|
|
||||||
|
- **Attempts:** Up to 5 attempts by default
|
||||||
|
- **Delays:** 1s → 2s → 4s → 8s → 16s
|
||||||
|
- **Timeout:** 30 seconds per attempt
|
||||||
|
- **Retry Conditions:**
|
||||||
|
- Server errors (5xx status codes)
|
||||||
|
- Network errors
|
||||||
|
- Timeouts
|
||||||
|
- **No Retry:**
|
||||||
|
- Client errors (4xx status codes)
|
||||||
|
- Successful delivery (2xx status codes)
|
||||||
|
|
||||||
|
## Benefits
|
||||||
|
|
||||||
|
1. **No Polling Required** - Eliminates constant API calls to check job status
|
||||||
|
2. **Real-time Notifications** - Immediate notification when jobs complete
|
||||||
|
3. **Reliable Delivery** - Exponential backoff ensures webhooks are delivered
|
||||||
|
4. **Flexible** - Choose between notification-only or full data delivery
|
||||||
|
5. **Secure** - Support for custom headers for authentication
|
||||||
|
6. **Configurable** - Global defaults or per-job configuration
|
||||||
|
7. **Universal Support** - Works with both `/crawl/job` and `/llm/job` endpoints
|
||||||
|
|
||||||
|
## TypeScript Client Example
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface WebhookConfig {
|
||||||
|
webhook_url: string;
|
||||||
|
webhook_data_in_payload?: boolean;
|
||||||
|
webhook_headers?: Record<string, string>;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CrawlJobRequest {
|
||||||
|
urls: string[];
|
||||||
|
browser_config?: Record<string, any>;
|
||||||
|
crawler_config?: Record<string, any>;
|
||||||
|
webhook_config?: WebhookConfig;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface LLMJobRequest {
|
||||||
|
url: string;
|
||||||
|
q: string;
|
||||||
|
schema?: string;
|
||||||
|
cache?: boolean;
|
||||||
|
provider?: string;
|
||||||
|
webhook_config?: WebhookConfig;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createCrawlJob(request: CrawlJobRequest) {
|
||||||
|
const response = await fetch('http://localhost:11235/crawl/job', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(request)
|
||||||
|
});
|
||||||
|
|
||||||
|
const { task_id } = await response.json();
|
||||||
|
return task_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createLLMJob(request: LLMJobRequest) {
|
||||||
|
const response = await fetch('http://localhost:11235/llm/job', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(request)
|
||||||
|
});
|
||||||
|
|
||||||
|
const { task_id } = await response.json();
|
||||||
|
return task_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Usage - Crawl Job
|
||||||
|
const crawlTaskId = await createCrawlJob({
|
||||||
|
urls: ['https://example.com'],
|
||||||
|
webhook_config: {
|
||||||
|
webhook_url: 'https://myapp.com/webhooks/crawl-complete',
|
||||||
|
webhook_data_in_payload: false,
|
||||||
|
webhook_headers: {
|
||||||
|
'X-Webhook-Secret': 'my-secret'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Usage - LLM Extraction Job
|
||||||
|
const llmTaskId = await createLLMJob({
|
||||||
|
url: 'https://example.com/article',
|
||||||
|
q: 'Extract the main points from this article',
|
||||||
|
provider: 'openai/gpt-4o-mini',
|
||||||
|
webhook_config: {
|
||||||
|
webhook_url: 'https://myapp.com/webhooks/llm-complete',
|
||||||
|
webhook_data_in_payload: true,
|
||||||
|
webhook_headers: {
|
||||||
|
'X-Webhook-Secret': 'my-secret'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring and Debugging
|
||||||
|
|
||||||
|
Webhook delivery attempts are logged at INFO level:
|
||||||
|
- Successful deliveries
|
||||||
|
- Retry attempts with delays
|
||||||
|
- Final failures after max attempts
|
||||||
|
|
||||||
|
Check the application logs for webhook delivery status:
|
||||||
|
```bash
|
||||||
|
docker logs crawl4ai-container | grep -i webhook
|
||||||
|
```
|
||||||
@@ -46,6 +46,7 @@ from utils import (
|
|||||||
get_llm_temperature,
|
get_llm_temperature,
|
||||||
get_llm_base_url
|
get_llm_base_url
|
||||||
)
|
)
|
||||||
|
from webhook import WebhookDeliveryService
|
||||||
|
|
||||||
import psutil, time
|
import psutil, time
|
||||||
|
|
||||||
@@ -120,10 +121,14 @@ async def process_llm_extraction(
|
|||||||
schema: Optional[str] = None,
|
schema: Optional[str] = None,
|
||||||
cache: str = "0",
|
cache: str = "0",
|
||||||
provider: Optional[str] = None,
|
provider: Optional[str] = None,
|
||||||
|
webhook_config: Optional[Dict] = None,
|
||||||
temperature: Optional[float] = None,
|
temperature: Optional[float] = None,
|
||||||
base_url: Optional[str] = None
|
base_url: Optional[str] = None
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Process LLM extraction in background."""
|
"""Process LLM extraction in background."""
|
||||||
|
# Initialize webhook service
|
||||||
|
webhook_service = WebhookDeliveryService(config)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Validate provider
|
# Validate provider
|
||||||
is_valid, error_msg = validate_llm_provider(config, provider)
|
is_valid, error_msg = validate_llm_provider(config, provider)
|
||||||
@@ -132,6 +137,16 @@ async def process_llm_extraction(
|
|||||||
"status": TaskStatus.FAILED,
|
"status": TaskStatus.FAILED,
|
||||||
"error": error_msg
|
"error": error_msg
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Send webhook notification on failure
|
||||||
|
await webhook_service.notify_job_completion(
|
||||||
|
task_id=task_id,
|
||||||
|
task_type="llm_extraction",
|
||||||
|
status="failed",
|
||||||
|
urls=[url],
|
||||||
|
webhook_config=webhook_config,
|
||||||
|
error=error_msg
|
||||||
|
)
|
||||||
return
|
return
|
||||||
api_key = get_llm_api_key(config, provider) # Returns None to let litellm handle it
|
api_key = get_llm_api_key(config, provider) # Returns None to let litellm handle it
|
||||||
llm_strategy = LLMExtractionStrategy(
|
llm_strategy = LLMExtractionStrategy(
|
||||||
@@ -162,17 +177,40 @@ async def process_llm_extraction(
|
|||||||
"status": TaskStatus.FAILED,
|
"status": TaskStatus.FAILED,
|
||||||
"error": result.error_message
|
"error": result.error_message
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Send webhook notification on failure
|
||||||
|
await webhook_service.notify_job_completion(
|
||||||
|
task_id=task_id,
|
||||||
|
task_type="llm_extraction",
|
||||||
|
status="failed",
|
||||||
|
urls=[url],
|
||||||
|
webhook_config=webhook_config,
|
||||||
|
error=result.error_message
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
content = json.loads(result.extracted_content)
|
content = json.loads(result.extracted_content)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
content = result.extracted_content
|
content = result.extracted_content
|
||||||
|
|
||||||
|
result_data = {"extracted_content": content}
|
||||||
|
|
||||||
await redis.hset(f"task:{task_id}", mapping={
|
await redis.hset(f"task:{task_id}", mapping={
|
||||||
"status": TaskStatus.COMPLETED,
|
"status": TaskStatus.COMPLETED,
|
||||||
"result": json.dumps(content)
|
"result": json.dumps(content)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Send webhook notification on successful completion
|
||||||
|
await webhook_service.notify_job_completion(
|
||||||
|
task_id=task_id,
|
||||||
|
task_type="llm_extraction",
|
||||||
|
status="completed",
|
||||||
|
urls=[url],
|
||||||
|
webhook_config=webhook_config,
|
||||||
|
result=result_data
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"LLM extraction error: {str(e)}", exc_info=True)
|
logger.error(f"LLM extraction error: {str(e)}", exc_info=True)
|
||||||
await redis.hset(f"task:{task_id}", mapping={
|
await redis.hset(f"task:{task_id}", mapping={
|
||||||
@@ -180,6 +218,16 @@ async def process_llm_extraction(
|
|||||||
"error": str(e)
|
"error": str(e)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Send webhook notification on failure
|
||||||
|
await webhook_service.notify_job_completion(
|
||||||
|
task_id=task_id,
|
||||||
|
task_type="llm_extraction",
|
||||||
|
status="failed",
|
||||||
|
urls=[url],
|
||||||
|
webhook_config=webhook_config,
|
||||||
|
error=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
async def handle_markdown_request(
|
async def handle_markdown_request(
|
||||||
url: str,
|
url: str,
|
||||||
filter_type: FilterType,
|
filter_type: FilterType,
|
||||||
@@ -261,6 +309,7 @@ async def handle_llm_request(
|
|||||||
cache: str = "0",
|
cache: str = "0",
|
||||||
config: Optional[dict] = None,
|
config: Optional[dict] = None,
|
||||||
provider: Optional[str] = None,
|
provider: Optional[str] = None,
|
||||||
|
webhook_config: Optional[Dict] = None,
|
||||||
temperature: Optional[float] = None,
|
temperature: Optional[float] = None,
|
||||||
api_base_url: Optional[str] = None
|
api_base_url: Optional[str] = None
|
||||||
) -> JSONResponse:
|
) -> JSONResponse:
|
||||||
@@ -294,6 +343,7 @@ async def handle_llm_request(
|
|||||||
base_url,
|
base_url,
|
||||||
config,
|
config,
|
||||||
provider,
|
provider,
|
||||||
|
webhook_config,
|
||||||
temperature,
|
temperature,
|
||||||
api_base_url
|
api_base_url
|
||||||
)
|
)
|
||||||
@@ -341,6 +391,7 @@ async def create_new_task(
|
|||||||
base_url: str,
|
base_url: str,
|
||||||
config: dict,
|
config: dict,
|
||||||
provider: Optional[str] = None,
|
provider: Optional[str] = None,
|
||||||
|
webhook_config: Optional[Dict] = None,
|
||||||
temperature: Optional[float] = None,
|
temperature: Optional[float] = None,
|
||||||
api_base_url: Optional[str] = None
|
api_base_url: Optional[str] = None
|
||||||
) -> JSONResponse:
|
) -> JSONResponse:
|
||||||
@@ -351,12 +402,18 @@ async def create_new_task(
|
|||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
task_id = f"llm_{int(datetime.now().timestamp())}_{id(background_tasks)}"
|
task_id = f"llm_{int(datetime.now().timestamp())}_{id(background_tasks)}"
|
||||||
|
|
||||||
await redis.hset(f"task:{task_id}", mapping={
|
task_data = {
|
||||||
"status": TaskStatus.PROCESSING,
|
"status": TaskStatus.PROCESSING,
|
||||||
"created_at": datetime.now().isoformat(),
|
"created_at": datetime.now().isoformat(),
|
||||||
"url": decoded_url
|
"url": decoded_url
|
||||||
})
|
}
|
||||||
|
|
||||||
|
# Store webhook config if provided
|
||||||
|
if webhook_config:
|
||||||
|
task_data["webhook_config"] = json.dumps(webhook_config)
|
||||||
|
|
||||||
|
await redis.hset(f"task:{task_id}", mapping=task_data)
|
||||||
|
|
||||||
background_tasks.add_task(
|
background_tasks.add_task(
|
||||||
process_llm_extraction,
|
process_llm_extraction,
|
||||||
@@ -368,6 +425,7 @@ async def create_new_task(
|
|||||||
schema,
|
schema,
|
||||||
cache,
|
cache,
|
||||||
provider,
|
provider,
|
||||||
|
webhook_config,
|
||||||
temperature,
|
temperature,
|
||||||
api_base_url
|
api_base_url
|
||||||
)
|
)
|
||||||
@@ -442,13 +500,15 @@ async def handle_crawl_request(
|
|||||||
urls: List[str],
|
urls: List[str],
|
||||||
browser_config: dict,
|
browser_config: dict,
|
||||||
crawler_config: dict,
|
crawler_config: dict,
|
||||||
config: dict
|
config: dict,
|
||||||
|
hooks_config: Optional[dict] = None
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Handle non-streaming crawl requests."""
|
"""Handle non-streaming crawl requests with optional hooks."""
|
||||||
start_mem_mb = _get_memory_mb() # <--- Get memory before
|
start_mem_mb = _get_memory_mb() # <--- Get memory before
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
mem_delta_mb = None
|
mem_delta_mb = None
|
||||||
peak_mem_mb = start_mem_mb
|
peak_mem_mb = start_mem_mb
|
||||||
|
hook_manager = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
urls = [('https://' + url) if not url.startswith(('http://', 'https://')) and not url.startswith(("raw:", "raw://")) else url for url in urls]
|
urls = [('https://' + url) if not url.startswith(('http://', 'https://')) and not url.startswith(("raw:", "raw://")) else url for url in urls]
|
||||||
@@ -468,6 +528,19 @@ async def handle_crawl_request(
|
|||||||
# crawler: AsyncWebCrawler = AsyncWebCrawler(config=browser_config)
|
# crawler: AsyncWebCrawler = AsyncWebCrawler(config=browser_config)
|
||||||
# await crawler.start()
|
# await crawler.start()
|
||||||
|
|
||||||
|
# Attach hooks if provided
|
||||||
|
hooks_status = {}
|
||||||
|
if hooks_config:
|
||||||
|
from hook_manager import attach_user_hooks_to_crawler, UserHookManager
|
||||||
|
hook_manager = UserHookManager(timeout=hooks_config.get('timeout', 30))
|
||||||
|
hooks_status, hook_manager = await attach_user_hooks_to_crawler(
|
||||||
|
crawler,
|
||||||
|
hooks_config.get('code', {}),
|
||||||
|
timeout=hooks_config.get('timeout', 30),
|
||||||
|
hook_manager=hook_manager
|
||||||
|
)
|
||||||
|
logger.info(f"Hooks attachment status: {hooks_status['status']}")
|
||||||
|
|
||||||
base_config = config["crawler"]["base_config"]
|
base_config = config["crawler"]["base_config"]
|
||||||
# Iterate on key-value pairs in global_config then use hasattr to set them
|
# Iterate on key-value pairs in global_config then use hasattr to set them
|
||||||
for key, value in base_config.items():
|
for key, value in base_config.items():
|
||||||
@@ -484,6 +557,10 @@ async def handle_crawl_request(
|
|||||||
config=crawler_config,
|
config=crawler_config,
|
||||||
dispatcher=dispatcher)
|
dispatcher=dispatcher)
|
||||||
results = await partial_func()
|
results = await partial_func()
|
||||||
|
|
||||||
|
# Ensure results is always a list
|
||||||
|
if not isinstance(results, list):
|
||||||
|
results = [results]
|
||||||
|
|
||||||
# await crawler.close()
|
# await crawler.close()
|
||||||
|
|
||||||
@@ -498,22 +575,71 @@ async def handle_crawl_request(
|
|||||||
# Process results to handle PDF bytes
|
# Process results to handle PDF bytes
|
||||||
processed_results = []
|
processed_results = []
|
||||||
for result in results:
|
for result in results:
|
||||||
result_dict = result.model_dump()
|
try:
|
||||||
# if fit_html is not a string, set it to None to avoid serialization errors
|
# Check if result has model_dump method (is a proper CrawlResult)
|
||||||
if "fit_html" in result_dict and not (result_dict["fit_html"] is None or isinstance(result_dict["fit_html"], str)):
|
if hasattr(result, 'model_dump'):
|
||||||
result_dict["fit_html"] = None
|
result_dict = result.model_dump()
|
||||||
# If PDF exists, encode it to base64
|
elif isinstance(result, dict):
|
||||||
if result_dict.get('pdf') is not None:
|
result_dict = result
|
||||||
result_dict['pdf'] = b64encode(result_dict['pdf']).decode('utf-8')
|
else:
|
||||||
processed_results.append(result_dict)
|
# Handle unexpected result type
|
||||||
|
logger.warning(f"Unexpected result type: {type(result)}")
|
||||||
|
result_dict = {
|
||||||
|
"url": str(result) if hasattr(result, '__str__') else "unknown",
|
||||||
|
"success": False,
|
||||||
|
"error_message": f"Unexpected result type: {type(result).__name__}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# if fit_html is not a string, set it to None to avoid serialization errors
|
||||||
|
if "fit_html" in result_dict and not (result_dict["fit_html"] is None or isinstance(result_dict["fit_html"], str)):
|
||||||
|
result_dict["fit_html"] = None
|
||||||
|
|
||||||
|
# If PDF exists, encode it to base64
|
||||||
|
if result_dict.get('pdf') is not None and isinstance(result_dict.get('pdf'), bytes):
|
||||||
|
result_dict['pdf'] = b64encode(result_dict['pdf']).decode('utf-8')
|
||||||
|
|
||||||
|
processed_results.append(result_dict)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing result: {e}")
|
||||||
|
processed_results.append({
|
||||||
|
"url": "unknown",
|
||||||
|
"success": False,
|
||||||
|
"error_message": str(e)
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
response = {
|
||||||
"success": True,
|
"success": True,
|
||||||
"results": processed_results,
|
"results": processed_results,
|
||||||
"server_processing_time_s": end_time - start_time,
|
"server_processing_time_s": end_time - start_time,
|
||||||
"server_memory_delta_mb": mem_delta_mb,
|
"server_memory_delta_mb": mem_delta_mb,
|
||||||
"server_peak_memory_mb": peak_mem_mb
|
"server_peak_memory_mb": peak_mem_mb
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Add hooks information if hooks were used
|
||||||
|
if hooks_config and hook_manager:
|
||||||
|
from hook_manager import UserHookManager
|
||||||
|
if isinstance(hook_manager, UserHookManager):
|
||||||
|
try:
|
||||||
|
# Ensure all hook data is JSON serializable
|
||||||
|
hook_data = {
|
||||||
|
"status": hooks_status,
|
||||||
|
"execution_log": hook_manager.execution_log,
|
||||||
|
"errors": hook_manager.errors,
|
||||||
|
"summary": hook_manager.get_summary()
|
||||||
|
}
|
||||||
|
# Test that it's serializable
|
||||||
|
json.dumps(hook_data)
|
||||||
|
response["hooks"] = hook_data
|
||||||
|
except (TypeError, ValueError) as e:
|
||||||
|
logger.error(f"Hook data not JSON serializable: {e}")
|
||||||
|
response["hooks"] = {
|
||||||
|
"status": {"status": "error", "message": "Hook data serialization failed"},
|
||||||
|
"execution_log": [],
|
||||||
|
"errors": [{"error": str(e)}],
|
||||||
|
"summary": {}
|
||||||
|
}
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Crawl error: {str(e)}", exc_info=True)
|
logger.error(f"Crawl error: {str(e)}", exc_info=True)
|
||||||
@@ -542,9 +668,11 @@ async def handle_stream_crawl_request(
|
|||||||
urls: List[str],
|
urls: List[str],
|
||||||
browser_config: dict,
|
browser_config: dict,
|
||||||
crawler_config: dict,
|
crawler_config: dict,
|
||||||
config: dict
|
config: dict,
|
||||||
) -> Tuple[AsyncWebCrawler, AsyncGenerator]:
|
hooks_config: Optional[dict] = None
|
||||||
"""Handle streaming crawl requests."""
|
) -> Tuple[AsyncWebCrawler, AsyncGenerator, Optional[Dict]]:
|
||||||
|
"""Handle streaming crawl requests with optional hooks."""
|
||||||
|
hooks_info = None
|
||||||
try:
|
try:
|
||||||
browser_config = BrowserConfig.load(browser_config)
|
browser_config = BrowserConfig.load(browser_config)
|
||||||
# browser_config.verbose = True # Set to False or remove for production stress testing
|
# browser_config.verbose = True # Set to False or remove for production stress testing
|
||||||
@@ -565,6 +693,20 @@ async def handle_stream_crawl_request(
|
|||||||
|
|
||||||
# crawler = AsyncWebCrawler(config=browser_config)
|
# crawler = AsyncWebCrawler(config=browser_config)
|
||||||
# await crawler.start()
|
# await crawler.start()
|
||||||
|
|
||||||
|
# Attach hooks if provided
|
||||||
|
if hooks_config:
|
||||||
|
from hook_manager import attach_user_hooks_to_crawler, UserHookManager
|
||||||
|
hook_manager = UserHookManager(timeout=hooks_config.get('timeout', 30))
|
||||||
|
hooks_status, hook_manager = await attach_user_hooks_to_crawler(
|
||||||
|
crawler,
|
||||||
|
hooks_config.get('code', {}),
|
||||||
|
timeout=hooks_config.get('timeout', 30),
|
||||||
|
hook_manager=hook_manager
|
||||||
|
)
|
||||||
|
logger.info(f"Hooks attachment status for streaming: {hooks_status['status']}")
|
||||||
|
# Include hook manager in hooks_info for proper tracking
|
||||||
|
hooks_info = {'status': hooks_status, 'manager': hook_manager}
|
||||||
|
|
||||||
results_gen = await crawler.arun_many(
|
results_gen = await crawler.arun_many(
|
||||||
urls=urls,
|
urls=urls,
|
||||||
@@ -572,7 +714,7 @@ async def handle_stream_crawl_request(
|
|||||||
dispatcher=dispatcher
|
dispatcher=dispatcher
|
||||||
)
|
)
|
||||||
|
|
||||||
return crawler, results_gen
|
return crawler, results_gen, hooks_info
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Make sure to close crawler if started during an error here
|
# Make sure to close crawler if started during an error here
|
||||||
@@ -596,6 +738,7 @@ async def handle_crawl_job(
|
|||||||
browser_config: Dict,
|
browser_config: Dict,
|
||||||
crawler_config: Dict,
|
crawler_config: Dict,
|
||||||
config: Dict,
|
config: Dict,
|
||||||
|
webhook_config: Optional[Dict] = None,
|
||||||
) -> Dict:
|
) -> Dict:
|
||||||
"""
|
"""
|
||||||
Fire-and-forget version of handle_crawl_request.
|
Fire-and-forget version of handle_crawl_request.
|
||||||
@@ -603,13 +746,24 @@ async def handle_crawl_job(
|
|||||||
lets /crawl/job/{task_id} polling fetch the result.
|
lets /crawl/job/{task_id} polling fetch the result.
|
||||||
"""
|
"""
|
||||||
task_id = f"crawl_{uuid4().hex[:8]}"
|
task_id = f"crawl_{uuid4().hex[:8]}"
|
||||||
await redis.hset(f"task:{task_id}", mapping={
|
|
||||||
|
# Store task data in Redis
|
||||||
|
task_data = {
|
||||||
"status": TaskStatus.PROCESSING, # <-- keep enum values consistent
|
"status": TaskStatus.PROCESSING, # <-- keep enum values consistent
|
||||||
"created_at": datetime.now(timezone.utc).replace(tzinfo=None).isoformat(),
|
"created_at": datetime.now(timezone.utc).replace(tzinfo=None).isoformat(),
|
||||||
"url": json.dumps(urls), # store list as JSON string
|
"url": json.dumps(urls), # store list as JSON string
|
||||||
"result": "",
|
"result": "",
|
||||||
"error": "",
|
"error": "",
|
||||||
})
|
}
|
||||||
|
|
||||||
|
# Store webhook config if provided
|
||||||
|
if webhook_config:
|
||||||
|
task_data["webhook_config"] = json.dumps(webhook_config)
|
||||||
|
|
||||||
|
await redis.hset(f"task:{task_id}", mapping=task_data)
|
||||||
|
|
||||||
|
# Initialize webhook service
|
||||||
|
webhook_service = WebhookDeliveryService(config)
|
||||||
|
|
||||||
async def _runner():
|
async def _runner():
|
||||||
try:
|
try:
|
||||||
@@ -623,6 +777,17 @@ async def handle_crawl_job(
|
|||||||
"status": TaskStatus.COMPLETED,
|
"status": TaskStatus.COMPLETED,
|
||||||
"result": json.dumps(result),
|
"result": json.dumps(result),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Send webhook notification on successful completion
|
||||||
|
await webhook_service.notify_job_completion(
|
||||||
|
task_id=task_id,
|
||||||
|
task_type="crawl",
|
||||||
|
status="completed",
|
||||||
|
urls=urls,
|
||||||
|
webhook_config=webhook_config,
|
||||||
|
result=result
|
||||||
|
)
|
||||||
|
|
||||||
await asyncio.sleep(5) # Give Redis time to process the update
|
await asyncio.sleep(5) # Give Redis time to process the update
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
await redis.hset(f"task:{task_id}", mapping={
|
await redis.hset(f"task:{task_id}", mapping={
|
||||||
@@ -630,5 +795,15 @@ async def handle_crawl_job(
|
|||||||
"error": str(exc),
|
"error": str(exc),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Send webhook notification on failure
|
||||||
|
await webhook_service.notify_job_completion(
|
||||||
|
task_id=task_id,
|
||||||
|
task_type="crawl",
|
||||||
|
status="failed",
|
||||||
|
urls=urls,
|
||||||
|
webhook_config=webhook_config,
|
||||||
|
error=str(exc)
|
||||||
|
)
|
||||||
|
|
||||||
background_tasks.add_task(_runner)
|
background_tasks.add_task(_runner)
|
||||||
return {"task_id": task_id}
|
return {"task_id": task_id}
|
||||||
@@ -87,4 +87,17 @@ observability:
|
|||||||
enabled: True
|
enabled: True
|
||||||
endpoint: "/metrics"
|
endpoint: "/metrics"
|
||||||
health_check:
|
health_check:
|
||||||
endpoint: "/health"
|
endpoint: "/health"
|
||||||
|
|
||||||
|
# Webhook Configuration
|
||||||
|
webhooks:
|
||||||
|
enabled: true
|
||||||
|
default_url: null # Optional: default webhook URL for all jobs
|
||||||
|
data_in_payload: false # Optional: default behavior for including data
|
||||||
|
retry:
|
||||||
|
max_attempts: 5
|
||||||
|
initial_delay_ms: 1000 # 1s, 2s, 4s, 8s, 16s exponential backoff
|
||||||
|
max_delay_ms: 32000
|
||||||
|
timeout_ms: 30000 # 30s timeout per webhook call
|
||||||
|
headers: # Optional: default headers to include
|
||||||
|
User-Agent: "Crawl4AI-Webhook/1.0"
|
||||||
@@ -2,8 +2,8 @@
|
|||||||
import asyncio, json, hashlib, time, psutil
|
import asyncio, json, hashlib, time, psutil
|
||||||
from contextlib import suppress
|
from contextlib import suppress
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
from crawl4ai import AsyncWebCrawler, BrowserConfig
|
from crawl4ai import AsyncWebCrawler, BrowserConfig, BrowserAdapter
|
||||||
from typing import Dict
|
from typing import Dict ,Optional
|
||||||
from utils import load_config
|
from utils import load_config
|
||||||
|
|
||||||
CONFIG = load_config()
|
CONFIG = load_config()
|
||||||
@@ -15,11 +15,22 @@ LOCK = asyncio.Lock()
|
|||||||
MEM_LIMIT = CONFIG.get("crawler", {}).get("memory_threshold_percent", 95.0) # % RAM – refuse new browsers above this
|
MEM_LIMIT = CONFIG.get("crawler", {}).get("memory_threshold_percent", 95.0) # % RAM – refuse new browsers above this
|
||||||
IDLE_TTL = CONFIG.get("crawler", {}).get("pool", {}).get("idle_ttl_sec", 1800) # close if unused for 30 min
|
IDLE_TTL = CONFIG.get("crawler", {}).get("pool", {}).get("idle_ttl_sec", 1800) # close if unused for 30 min
|
||||||
|
|
||||||
def _sig(cfg: BrowserConfig) -> str:
|
|
||||||
payload = json.dumps(cfg.to_dict(), sort_keys=True, separators=(",",":"))
|
def _sig(cfg: BrowserConfig, adapter: Optional[BrowserAdapter] = None) -> str:
|
||||||
|
try:
|
||||||
|
config_payload = json.dumps(cfg.to_dict(), sort_keys=True, separators=(",", ":"))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
# Fallback to string representation if JSON serialization fails
|
||||||
|
config_payload = str(cfg.to_dict())
|
||||||
|
adapter_name = adapter.__class__.__name__ if adapter else "PlaywrightAdapter"
|
||||||
|
payload = f"{config_payload}:{adapter_name}"
|
||||||
return hashlib.sha1(payload.encode()).hexdigest()
|
return hashlib.sha1(payload.encode()).hexdigest()
|
||||||
|
|
||||||
async def get_crawler(cfg: BrowserConfig) -> AsyncWebCrawler:
|
|
||||||
|
async def get_crawler(
|
||||||
|
cfg: BrowserConfig, adapter: Optional[BrowserAdapter] = None
|
||||||
|
) -> AsyncWebCrawler:
|
||||||
|
sig = None
|
||||||
try:
|
try:
|
||||||
sig = _sig(cfg)
|
sig = _sig(cfg)
|
||||||
async with LOCK:
|
async with LOCK:
|
||||||
@@ -37,12 +48,13 @@ async def get_crawler(cfg: BrowserConfig) -> AsyncWebCrawler:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(f"Failed to start browser: {e}")
|
raise RuntimeError(f"Failed to start browser: {e}")
|
||||||
finally:
|
finally:
|
||||||
if sig in POOL:
|
if sig:
|
||||||
LAST_USED[sig] = time.time()
|
if sig in POOL:
|
||||||
else:
|
LAST_USED[sig] = time.time()
|
||||||
# If we failed to start the browser, we should remove it from the pool
|
else:
|
||||||
POOL.pop(sig, None)
|
# If we failed to start the browser, we should remove it from the pool
|
||||||
LAST_USED.pop(sig, None)
|
POOL.pop(sig, None)
|
||||||
|
LAST_USED.pop(sig, None)
|
||||||
# If we failed to start the browser, we should remove it from the pool
|
# If we failed to start the browser, we should remove it from the pool
|
||||||
async def close_all():
|
async def close_all():
|
||||||
async with LOCK:
|
async with LOCK:
|
||||||
|
|||||||
512
deploy/docker/hook_manager.py
Normal file
512
deploy/docker/hook_manager.py
Normal file
@@ -0,0 +1,512 @@
|
|||||||
|
"""
|
||||||
|
Hook Manager for User-Provided Hook Functions
|
||||||
|
Handles validation, compilation, and safe execution of user-provided hook code
|
||||||
|
"""
|
||||||
|
|
||||||
|
import ast
|
||||||
|
import asyncio
|
||||||
|
import traceback
|
||||||
|
from typing import Dict, Callable, Optional, Tuple, List, Any
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class UserHookManager:
|
||||||
|
"""Manages user-provided hook functions with error isolation"""
|
||||||
|
|
||||||
|
# Expected signatures for each hook point
|
||||||
|
HOOK_SIGNATURES = {
|
||||||
|
"on_browser_created": ["browser"],
|
||||||
|
"on_page_context_created": ["page", "context"],
|
||||||
|
"before_goto": ["page", "context", "url"],
|
||||||
|
"after_goto": ["page", "context", "url", "response"],
|
||||||
|
"on_user_agent_updated": ["page", "context", "user_agent"],
|
||||||
|
"on_execution_started": ["page", "context"],
|
||||||
|
"before_retrieve_html": ["page", "context"],
|
||||||
|
"before_return_html": ["page", "context", "html"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Default timeout for hook execution (in seconds)
|
||||||
|
DEFAULT_TIMEOUT = 30
|
||||||
|
|
||||||
|
def __init__(self, timeout: int = DEFAULT_TIMEOUT):
|
||||||
|
self.timeout = timeout
|
||||||
|
self.errors: List[Dict[str, Any]] = []
|
||||||
|
self.compiled_hooks: Dict[str, Callable] = {}
|
||||||
|
self.execution_log: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
def validate_hook_structure(self, hook_code: str, hook_point: str) -> Tuple[bool, str]:
|
||||||
|
"""
|
||||||
|
Validate the structure of user-provided hook code
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hook_code: The Python code string containing the hook function
|
||||||
|
hook_point: The hook point name (e.g., 'on_page_context_created')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (is_valid, error_message)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Parse the code
|
||||||
|
tree = ast.parse(hook_code)
|
||||||
|
|
||||||
|
# Check if it's empty
|
||||||
|
if not tree.body:
|
||||||
|
return False, "Hook code is empty"
|
||||||
|
|
||||||
|
# Find the function definition
|
||||||
|
func_def = None
|
||||||
|
for node in tree.body:
|
||||||
|
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||||
|
func_def = node
|
||||||
|
break
|
||||||
|
|
||||||
|
if not func_def:
|
||||||
|
return False, "Hook must contain a function definition (def or async def)"
|
||||||
|
|
||||||
|
# Check if it's async (all hooks should be async)
|
||||||
|
if not isinstance(func_def, ast.AsyncFunctionDef):
|
||||||
|
return False, f"Hook function must be async (use 'async def' instead of 'def')"
|
||||||
|
|
||||||
|
# Get function name for better error messages
|
||||||
|
func_name = func_def.name
|
||||||
|
|
||||||
|
# Validate parameters
|
||||||
|
expected_params = self.HOOK_SIGNATURES.get(hook_point, [])
|
||||||
|
if not expected_params:
|
||||||
|
return False, f"Unknown hook point: {hook_point}"
|
||||||
|
|
||||||
|
func_params = [arg.arg for arg in func_def.args.args]
|
||||||
|
|
||||||
|
# Check if it has **kwargs for flexibility
|
||||||
|
has_kwargs = func_def.args.kwarg is not None
|
||||||
|
|
||||||
|
# Must have at least the expected parameters
|
||||||
|
missing_params = []
|
||||||
|
for expected in expected_params:
|
||||||
|
if expected not in func_params:
|
||||||
|
missing_params.append(expected)
|
||||||
|
|
||||||
|
if missing_params and not has_kwargs:
|
||||||
|
return False, f"Hook function '{func_name}' must accept parameters: {', '.join(expected_params)} (missing: {', '.join(missing_params)})"
|
||||||
|
|
||||||
|
# Check if it returns something (should return page or browser)
|
||||||
|
has_return = any(isinstance(node, ast.Return) for node in ast.walk(func_def))
|
||||||
|
if not has_return:
|
||||||
|
# Warning, not error - we'll handle this
|
||||||
|
logger.warning(f"Hook function '{func_name}' should return the {expected_params[0]} object")
|
||||||
|
|
||||||
|
return True, "Valid"
|
||||||
|
|
||||||
|
except SyntaxError as e:
|
||||||
|
return False, f"Syntax error at line {e.lineno}: {str(e)}"
|
||||||
|
except Exception as e:
|
||||||
|
return False, f"Failed to parse hook code: {str(e)}"
|
||||||
|
|
||||||
|
def compile_hook(self, hook_code: str, hook_point: str) -> Optional[Callable]:
|
||||||
|
"""
|
||||||
|
Compile user-provided hook code into a callable function
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hook_code: The Python code string
|
||||||
|
hook_point: The hook point name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Compiled function or None if compilation failed
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Create a safe namespace for the hook
|
||||||
|
# Use a more complete builtins that includes __import__
|
||||||
|
import builtins
|
||||||
|
safe_builtins = {}
|
||||||
|
|
||||||
|
# Add safe built-in functions
|
||||||
|
allowed_builtins = [
|
||||||
|
'print', 'len', 'str', 'int', 'float', 'bool',
|
||||||
|
'list', 'dict', 'set', 'tuple', 'range', 'enumerate',
|
||||||
|
'zip', 'map', 'filter', 'any', 'all', 'sum', 'min', 'max',
|
||||||
|
'sorted', 'reversed', 'abs', 'round', 'isinstance', 'type',
|
||||||
|
'getattr', 'hasattr', 'setattr', 'callable', 'iter', 'next',
|
||||||
|
'__import__', '__build_class__' # Required for exec
|
||||||
|
]
|
||||||
|
|
||||||
|
for name in allowed_builtins:
|
||||||
|
if hasattr(builtins, name):
|
||||||
|
safe_builtins[name] = getattr(builtins, name)
|
||||||
|
|
||||||
|
namespace = {
|
||||||
|
'__name__': f'user_hook_{hook_point}',
|
||||||
|
'__builtins__': safe_builtins
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add commonly needed imports
|
||||||
|
exec("import asyncio", namespace)
|
||||||
|
exec("import json", namespace)
|
||||||
|
exec("import re", namespace)
|
||||||
|
exec("from typing import Dict, List, Optional", namespace)
|
||||||
|
|
||||||
|
# Execute the code to define the function
|
||||||
|
exec(hook_code, namespace)
|
||||||
|
|
||||||
|
# Find the async function in the namespace
|
||||||
|
for name, obj in namespace.items():
|
||||||
|
if callable(obj) and not name.startswith('_') and asyncio.iscoroutinefunction(obj):
|
||||||
|
return obj
|
||||||
|
|
||||||
|
# If no async function found, look for any function
|
||||||
|
for name, obj in namespace.items():
|
||||||
|
if callable(obj) and not name.startswith('_'):
|
||||||
|
logger.warning(f"Found non-async function '{name}' - wrapping it")
|
||||||
|
# Wrap sync function in async
|
||||||
|
async def async_wrapper(*args, **kwargs):
|
||||||
|
return obj(*args, **kwargs)
|
||||||
|
return async_wrapper
|
||||||
|
|
||||||
|
raise ValueError("No callable function found in hook code")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error = {
|
||||||
|
'hook_point': hook_point,
|
||||||
|
'error': f"Failed to compile hook: {str(e)}",
|
||||||
|
'type': 'compilation_error',
|
||||||
|
'traceback': traceback.format_exc()
|
||||||
|
}
|
||||||
|
self.errors.append(error)
|
||||||
|
logger.error(f"Hook compilation failed for {hook_point}: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def execute_hook_safely(
|
||||||
|
self,
|
||||||
|
hook_func: Callable,
|
||||||
|
hook_point: str,
|
||||||
|
*args,
|
||||||
|
**kwargs
|
||||||
|
) -> Tuple[Any, Optional[Dict]]:
|
||||||
|
"""
|
||||||
|
Execute a user hook with error isolation and timeout
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hook_func: The compiled hook function
|
||||||
|
hook_point: The hook point name
|
||||||
|
*args, **kwargs: Arguments to pass to the hook
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (result, error_dict)
|
||||||
|
"""
|
||||||
|
start_time = asyncio.get_event_loop().time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Add timeout to prevent infinite loops
|
||||||
|
result = await asyncio.wait_for(
|
||||||
|
hook_func(*args, **kwargs),
|
||||||
|
timeout=self.timeout
|
||||||
|
)
|
||||||
|
|
||||||
|
# Log successful execution
|
||||||
|
execution_time = asyncio.get_event_loop().time() - start_time
|
||||||
|
self.execution_log.append({
|
||||||
|
'hook_point': hook_point,
|
||||||
|
'status': 'success',
|
||||||
|
'execution_time': execution_time,
|
||||||
|
'timestamp': start_time
|
||||||
|
})
|
||||||
|
|
||||||
|
return result, None
|
||||||
|
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
error = {
|
||||||
|
'hook_point': hook_point,
|
||||||
|
'error': f'Hook execution timed out ({self.timeout}s limit)',
|
||||||
|
'type': 'timeout',
|
||||||
|
'execution_time': self.timeout
|
||||||
|
}
|
||||||
|
self.errors.append(error)
|
||||||
|
self.execution_log.append({
|
||||||
|
'hook_point': hook_point,
|
||||||
|
'status': 'timeout',
|
||||||
|
'error': error['error'],
|
||||||
|
'execution_time': self.timeout,
|
||||||
|
'timestamp': start_time
|
||||||
|
})
|
||||||
|
# Return the first argument (usually page/browser) to continue
|
||||||
|
return args[0] if args else None, error
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
execution_time = asyncio.get_event_loop().time() - start_time
|
||||||
|
error = {
|
||||||
|
'hook_point': hook_point,
|
||||||
|
'error': str(e),
|
||||||
|
'type': type(e).__name__,
|
||||||
|
'traceback': traceback.format_exc(),
|
||||||
|
'execution_time': execution_time
|
||||||
|
}
|
||||||
|
self.errors.append(error)
|
||||||
|
self.execution_log.append({
|
||||||
|
'hook_point': hook_point,
|
||||||
|
'status': 'failed',
|
||||||
|
'error': str(e),
|
||||||
|
'error_type': type(e).__name__,
|
||||||
|
'execution_time': execution_time,
|
||||||
|
'timestamp': start_time
|
||||||
|
})
|
||||||
|
# Return the first argument (usually page/browser) to continue
|
||||||
|
return args[0] if args else None, error
|
||||||
|
|
||||||
|
def get_summary(self) -> Dict[str, Any]:
|
||||||
|
"""Get a summary of hook execution"""
|
||||||
|
total_hooks = len(self.execution_log)
|
||||||
|
successful = sum(1 for log in self.execution_log if log['status'] == 'success')
|
||||||
|
failed = sum(1 for log in self.execution_log if log['status'] == 'failed')
|
||||||
|
timed_out = sum(1 for log in self.execution_log if log['status'] == 'timeout')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'total_executions': total_hooks,
|
||||||
|
'successful': successful,
|
||||||
|
'failed': failed,
|
||||||
|
'timed_out': timed_out,
|
||||||
|
'success_rate': (successful / total_hooks * 100) if total_hooks > 0 else 0,
|
||||||
|
'total_errors': len(self.errors)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class IsolatedHookWrapper:
|
||||||
|
"""Wraps user hooks with error isolation and reporting"""
|
||||||
|
|
||||||
|
def __init__(self, hook_manager: UserHookManager):
|
||||||
|
self.hook_manager = hook_manager
|
||||||
|
|
||||||
|
def create_hook_wrapper(self, user_hook: Callable, hook_point: str) -> Callable:
|
||||||
|
"""
|
||||||
|
Create a wrapper that isolates hook errors from main process
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_hook: The compiled user hook function
|
||||||
|
hook_point: The hook point name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Wrapped async function that handles errors gracefully
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def wrapped_hook(*args, **kwargs):
|
||||||
|
"""Wrapped hook with error isolation"""
|
||||||
|
# Get the main return object (page/browser)
|
||||||
|
# This ensures we always have something to return
|
||||||
|
return_obj = None
|
||||||
|
if args:
|
||||||
|
return_obj = args[0]
|
||||||
|
elif 'page' in kwargs:
|
||||||
|
return_obj = kwargs['page']
|
||||||
|
elif 'browser' in kwargs:
|
||||||
|
return_obj = kwargs['browser']
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Execute user hook with safety
|
||||||
|
result, error = await self.hook_manager.execute_hook_safely(
|
||||||
|
user_hook,
|
||||||
|
hook_point,
|
||||||
|
*args,
|
||||||
|
**kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
if error:
|
||||||
|
# Hook failed but we continue with original object
|
||||||
|
logger.warning(f"User hook failed at {hook_point}: {error['error']}")
|
||||||
|
return return_obj
|
||||||
|
|
||||||
|
# Hook succeeded - return its result or the original object
|
||||||
|
if result is None:
|
||||||
|
logger.debug(f"Hook at {hook_point} returned None, using original object")
|
||||||
|
return return_obj
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# This should rarely happen due to execute_hook_safely
|
||||||
|
logger.error(f"Unexpected error in hook wrapper for {hook_point}: {e}")
|
||||||
|
return return_obj
|
||||||
|
|
||||||
|
# Set function name for debugging
|
||||||
|
wrapped_hook.__name__ = f"wrapped_{hook_point}"
|
||||||
|
return wrapped_hook
|
||||||
|
|
||||||
|
|
||||||
|
async def process_user_hooks(
|
||||||
|
hooks_input: Dict[str, str],
|
||||||
|
timeout: int = 30
|
||||||
|
) -> Tuple[Dict[str, Callable], List[Dict], UserHookManager]:
|
||||||
|
"""
|
||||||
|
Process and compile user-provided hook functions
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hooks_input: Dictionary mapping hook points to code strings
|
||||||
|
timeout: Timeout for each hook execution
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (compiled_hooks, validation_errors, hook_manager)
|
||||||
|
"""
|
||||||
|
|
||||||
|
hook_manager = UserHookManager(timeout=timeout)
|
||||||
|
wrapper = IsolatedHookWrapper(hook_manager)
|
||||||
|
compiled_hooks = {}
|
||||||
|
validation_errors = []
|
||||||
|
|
||||||
|
for hook_point, hook_code in hooks_input.items():
|
||||||
|
# Skip empty hooks
|
||||||
|
if not hook_code or not hook_code.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Validate hook point
|
||||||
|
if hook_point not in UserHookManager.HOOK_SIGNATURES:
|
||||||
|
validation_errors.append({
|
||||||
|
'hook_point': hook_point,
|
||||||
|
'error': f'Unknown hook point. Valid points: {", ".join(UserHookManager.HOOK_SIGNATURES.keys())}',
|
||||||
|
'code_preview': hook_code[:100] + '...' if len(hook_code) > 100 else hook_code
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Validate structure
|
||||||
|
is_valid, message = hook_manager.validate_hook_structure(hook_code, hook_point)
|
||||||
|
if not is_valid:
|
||||||
|
validation_errors.append({
|
||||||
|
'hook_point': hook_point,
|
||||||
|
'error': message,
|
||||||
|
'code_preview': hook_code[:100] + '...' if len(hook_code) > 100 else hook_code
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Compile the hook
|
||||||
|
hook_func = hook_manager.compile_hook(hook_code, hook_point)
|
||||||
|
if hook_func:
|
||||||
|
# Wrap with error isolation
|
||||||
|
wrapped_hook = wrapper.create_hook_wrapper(hook_func, hook_point)
|
||||||
|
compiled_hooks[hook_point] = wrapped_hook
|
||||||
|
logger.info(f"Successfully compiled hook for {hook_point}")
|
||||||
|
else:
|
||||||
|
validation_errors.append({
|
||||||
|
'hook_point': hook_point,
|
||||||
|
'error': 'Failed to compile hook function - check syntax and structure',
|
||||||
|
'code_preview': hook_code[:100] + '...' if len(hook_code) > 100 else hook_code
|
||||||
|
})
|
||||||
|
|
||||||
|
return compiled_hooks, validation_errors, hook_manager
|
||||||
|
|
||||||
|
|
||||||
|
async def process_user_hooks_with_manager(
|
||||||
|
hooks_input: Dict[str, str],
|
||||||
|
hook_manager: UserHookManager
|
||||||
|
) -> Tuple[Dict[str, Callable], List[Dict]]:
|
||||||
|
"""
|
||||||
|
Process and compile user-provided hook functions with existing manager
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hooks_input: Dictionary mapping hook points to code strings
|
||||||
|
hook_manager: Existing UserHookManager instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (compiled_hooks, validation_errors)
|
||||||
|
"""
|
||||||
|
|
||||||
|
wrapper = IsolatedHookWrapper(hook_manager)
|
||||||
|
compiled_hooks = {}
|
||||||
|
validation_errors = []
|
||||||
|
|
||||||
|
for hook_point, hook_code in hooks_input.items():
|
||||||
|
# Skip empty hooks
|
||||||
|
if not hook_code or not hook_code.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Validate hook point
|
||||||
|
if hook_point not in UserHookManager.HOOK_SIGNATURES:
|
||||||
|
validation_errors.append({
|
||||||
|
'hook_point': hook_point,
|
||||||
|
'error': f'Unknown hook point. Valid points: {", ".join(UserHookManager.HOOK_SIGNATURES.keys())}',
|
||||||
|
'code_preview': hook_code[:100] + '...' if len(hook_code) > 100 else hook_code
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Validate structure
|
||||||
|
is_valid, message = hook_manager.validate_hook_structure(hook_code, hook_point)
|
||||||
|
if not is_valid:
|
||||||
|
validation_errors.append({
|
||||||
|
'hook_point': hook_point,
|
||||||
|
'error': message,
|
||||||
|
'code_preview': hook_code[:100] + '...' if len(hook_code) > 100 else hook_code
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Compile the hook
|
||||||
|
hook_func = hook_manager.compile_hook(hook_code, hook_point)
|
||||||
|
if hook_func:
|
||||||
|
# Wrap with error isolation
|
||||||
|
wrapped_hook = wrapper.create_hook_wrapper(hook_func, hook_point)
|
||||||
|
compiled_hooks[hook_point] = wrapped_hook
|
||||||
|
logger.info(f"Successfully compiled hook for {hook_point}")
|
||||||
|
else:
|
||||||
|
validation_errors.append({
|
||||||
|
'hook_point': hook_point,
|
||||||
|
'error': 'Failed to compile hook function - check syntax and structure',
|
||||||
|
'code_preview': hook_code[:100] + '...' if len(hook_code) > 100 else hook_code
|
||||||
|
})
|
||||||
|
|
||||||
|
return compiled_hooks, validation_errors
|
||||||
|
|
||||||
|
|
||||||
|
async def attach_user_hooks_to_crawler(
|
||||||
|
crawler, # AsyncWebCrawler instance
|
||||||
|
user_hooks: Dict[str, str],
|
||||||
|
timeout: int = 30,
|
||||||
|
hook_manager: Optional[UserHookManager] = None
|
||||||
|
) -> Tuple[Dict[str, Any], UserHookManager]:
|
||||||
|
"""
|
||||||
|
Attach user-provided hooks to crawler with full error reporting
|
||||||
|
|
||||||
|
Args:
|
||||||
|
crawler: AsyncWebCrawler instance
|
||||||
|
user_hooks: Dictionary mapping hook points to code strings
|
||||||
|
timeout: Timeout for each hook execution
|
||||||
|
hook_manager: Optional existing UserHookManager instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (status_dict, hook_manager)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Use provided hook_manager or create a new one
|
||||||
|
if hook_manager is None:
|
||||||
|
hook_manager = UserHookManager(timeout=timeout)
|
||||||
|
|
||||||
|
# Process hooks with the hook_manager
|
||||||
|
compiled_hooks, validation_errors = await process_user_hooks_with_manager(
|
||||||
|
user_hooks, hook_manager
|
||||||
|
)
|
||||||
|
|
||||||
|
# Log validation errors
|
||||||
|
if validation_errors:
|
||||||
|
logger.warning(f"Hook validation errors: {validation_errors}")
|
||||||
|
|
||||||
|
# Attach successfully compiled hooks
|
||||||
|
attached_hooks = []
|
||||||
|
for hook_point, wrapped_hook in compiled_hooks.items():
|
||||||
|
try:
|
||||||
|
crawler.crawler_strategy.set_hook(hook_point, wrapped_hook)
|
||||||
|
attached_hooks.append(hook_point)
|
||||||
|
logger.info(f"Attached hook to {hook_point}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to attach hook to {hook_point}: {e}")
|
||||||
|
validation_errors.append({
|
||||||
|
'hook_point': hook_point,
|
||||||
|
'error': f'Failed to attach hook: {str(e)}'
|
||||||
|
})
|
||||||
|
|
||||||
|
status = 'success' if not validation_errors else ('partial' if attached_hooks else 'failed')
|
||||||
|
|
||||||
|
status_dict = {
|
||||||
|
'status': status,
|
||||||
|
'attached_hooks': attached_hooks,
|
||||||
|
'validation_errors': validation_errors,
|
||||||
|
'total_hooks_provided': len(user_hooks),
|
||||||
|
'successfully_attached': len(attached_hooks),
|
||||||
|
'failed_validation': len(validation_errors)
|
||||||
|
}
|
||||||
|
|
||||||
|
return status_dict, hook_manager
|
||||||
@@ -12,6 +12,7 @@ from api import (
|
|||||||
handle_crawl_job,
|
handle_crawl_job,
|
||||||
handle_task_status,
|
handle_task_status,
|
||||||
)
|
)
|
||||||
|
from schemas import WebhookConfig
|
||||||
|
|
||||||
# ------------- dependency placeholders -------------
|
# ------------- dependency placeholders -------------
|
||||||
_redis = None # will be injected from server.py
|
_redis = None # will be injected from server.py
|
||||||
@@ -37,6 +38,7 @@ class LlmJobPayload(BaseModel):
|
|||||||
schema: Optional[str] = None
|
schema: Optional[str] = None
|
||||||
cache: bool = False
|
cache: bool = False
|
||||||
provider: Optional[str] = None
|
provider: Optional[str] = None
|
||||||
|
webhook_config: Optional[WebhookConfig] = None
|
||||||
temperature: Optional[float] = None
|
temperature: Optional[float] = None
|
||||||
base_url: Optional[str] = None
|
base_url: Optional[str] = None
|
||||||
|
|
||||||
@@ -45,6 +47,7 @@ class CrawlJobPayload(BaseModel):
|
|||||||
urls: list[HttpUrl]
|
urls: list[HttpUrl]
|
||||||
browser_config: Dict = {}
|
browser_config: Dict = {}
|
||||||
crawler_config: Dict = {}
|
crawler_config: Dict = {}
|
||||||
|
webhook_config: Optional[WebhookConfig] = None
|
||||||
|
|
||||||
|
|
||||||
# ---------- LLM job ---------------------------------------------------------
|
# ---------- LLM job ---------------------------------------------------------
|
||||||
@@ -55,6 +58,10 @@ async def llm_job_enqueue(
|
|||||||
request: Request,
|
request: Request,
|
||||||
_td: Dict = Depends(lambda: _token_dep()), # late-bound dep
|
_td: Dict = Depends(lambda: _token_dep()), # late-bound dep
|
||||||
):
|
):
|
||||||
|
webhook_config = None
|
||||||
|
if payload.webhook_config:
|
||||||
|
webhook_config = payload.webhook_config.model_dump(mode='json')
|
||||||
|
|
||||||
return await handle_llm_request(
|
return await handle_llm_request(
|
||||||
_redis,
|
_redis,
|
||||||
background_tasks,
|
background_tasks,
|
||||||
@@ -65,6 +72,7 @@ async def llm_job_enqueue(
|
|||||||
cache=payload.cache,
|
cache=payload.cache,
|
||||||
config=_config,
|
config=_config,
|
||||||
provider=payload.provider,
|
provider=payload.provider,
|
||||||
|
webhook_config=webhook_config,
|
||||||
temperature=payload.temperature,
|
temperature=payload.temperature,
|
||||||
api_base_url=payload.base_url,
|
api_base_url=payload.base_url,
|
||||||
)
|
)
|
||||||
@@ -86,6 +94,10 @@ async def crawl_job_enqueue(
|
|||||||
background_tasks: BackgroundTasks,
|
background_tasks: BackgroundTasks,
|
||||||
_td: Dict = Depends(lambda: _token_dep()),
|
_td: Dict = Depends(lambda: _token_dep()),
|
||||||
):
|
):
|
||||||
|
webhook_config = None
|
||||||
|
if payload.webhook_config:
|
||||||
|
webhook_config = payload.webhook_config.model_dump(mode='json')
|
||||||
|
|
||||||
return await handle_crawl_job(
|
return await handle_crawl_job(
|
||||||
_redis,
|
_redis,
|
||||||
background_tasks,
|
background_tasks,
|
||||||
@@ -93,6 +105,7 @@ async def crawl_job_enqueue(
|
|||||||
payload.browser_config,
|
payload.browser_config,
|
||||||
payload.crawler_config,
|
payload.crawler_config,
|
||||||
config=_config,
|
config=_config,
|
||||||
|
webhook_config=webhook_config,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,6 @@ pydantic>=2.11
|
|||||||
rank-bm25==0.2.2
|
rank-bm25==0.2.2
|
||||||
anyio==4.9.0
|
anyio==4.9.0
|
||||||
PyJWT==2.10.1
|
PyJWT==2.10.1
|
||||||
mcp>=1.6.0
|
mcp>=1.18.0
|
||||||
websockets>=15.0.1
|
websockets>=15.0.1
|
||||||
httpx[http2]>=0.27.2
|
httpx[http2]>=0.27.2
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import List, Optional, Dict
|
from typing import List, Optional, Dict
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field, HttpUrl
|
||||||
from utils import FilterType
|
from utils import FilterType
|
||||||
|
|
||||||
|
|
||||||
@@ -9,6 +9,50 @@ class CrawlRequest(BaseModel):
|
|||||||
browser_config: Optional[Dict] = Field(default_factory=dict)
|
browser_config: Optional[Dict] = Field(default_factory=dict)
|
||||||
crawler_config: Optional[Dict] = Field(default_factory=dict)
|
crawler_config: Optional[Dict] = Field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
class HookConfig(BaseModel):
|
||||||
|
"""Configuration for user-provided hooks"""
|
||||||
|
code: Dict[str, str] = Field(
|
||||||
|
default_factory=dict,
|
||||||
|
description="Map of hook points to Python code strings"
|
||||||
|
)
|
||||||
|
timeout: int = Field(
|
||||||
|
default=30,
|
||||||
|
ge=1,
|
||||||
|
le=120,
|
||||||
|
description="Timeout in seconds for each hook execution"
|
||||||
|
)
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
schema_extra = {
|
||||||
|
"example": {
|
||||||
|
"code": {
|
||||||
|
"on_page_context_created": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
# Block images to speed up crawling
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif}", lambda route: route.abort())
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
"before_retrieve_html": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
# Scroll to load lazy content
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(2000)
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
},
|
||||||
|
"timeout": 30
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CrawlRequestWithHooks(CrawlRequest):
|
||||||
|
"""Extended crawl request with hooks support"""
|
||||||
|
hooks: Optional[HookConfig] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Optional user-provided hook functions"
|
||||||
|
)
|
||||||
|
|
||||||
class MarkdownRequest(BaseModel):
|
class MarkdownRequest(BaseModel):
|
||||||
"""Request body for the /md endpoint."""
|
"""Request body for the /md endpoint."""
|
||||||
url: str = Field(..., description="Absolute http/https URL to fetch")
|
url: str = Field(..., description="Absolute http/https URL to fetch")
|
||||||
@@ -41,4 +85,22 @@ class JSEndpointRequest(BaseModel):
|
|||||||
scripts: List[str] = Field(
|
scripts: List[str] = Field(
|
||||||
...,
|
...,
|
||||||
description="List of separated JavaScript snippets to execute"
|
description="List of separated JavaScript snippets to execute"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WebhookConfig(BaseModel):
|
||||||
|
"""Configuration for webhook notifications."""
|
||||||
|
webhook_url: HttpUrl
|
||||||
|
webhook_data_in_payload: bool = False
|
||||||
|
webhook_headers: Optional[Dict[str, str]] = None
|
||||||
|
|
||||||
|
|
||||||
|
class WebhookPayload(BaseModel):
|
||||||
|
"""Payload sent to webhook endpoints."""
|
||||||
|
task_id: str
|
||||||
|
task_type: str # "crawl", "llm_extraction", etc.
|
||||||
|
status: str # "completed" or "failed"
|
||||||
|
timestamp: str # ISO 8601 format
|
||||||
|
urls: List[str]
|
||||||
|
error: Optional[str] = None
|
||||||
|
data: Optional[Dict] = None # Included only if webhook_data_in_payload=True
|
||||||
@@ -23,7 +23,7 @@ from api import (
|
|||||||
stream_results
|
stream_results
|
||||||
)
|
)
|
||||||
from schemas import (
|
from schemas import (
|
||||||
CrawlRequest,
|
CrawlRequestWithHooks,
|
||||||
MarkdownRequest,
|
MarkdownRequest,
|
||||||
RawCode,
|
RawCode,
|
||||||
HTMLRequest,
|
HTMLRequest,
|
||||||
@@ -462,6 +462,72 @@ async def get_schema():
|
|||||||
"crawler": CrawlerRunConfig().dump()}
|
"crawler": CrawlerRunConfig().dump()}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/hooks/info")
|
||||||
|
async def get_hooks_info():
|
||||||
|
"""Get information about available hook points and their signatures"""
|
||||||
|
from hook_manager import UserHookManager
|
||||||
|
|
||||||
|
hook_info = {}
|
||||||
|
for hook_point, params in UserHookManager.HOOK_SIGNATURES.items():
|
||||||
|
hook_info[hook_point] = {
|
||||||
|
"parameters": params,
|
||||||
|
"description": get_hook_description(hook_point),
|
||||||
|
"example": get_hook_example(hook_point)
|
||||||
|
}
|
||||||
|
|
||||||
|
return JSONResponse({
|
||||||
|
"available_hooks": hook_info,
|
||||||
|
"timeout_limits": {
|
||||||
|
"min": 1,
|
||||||
|
"max": 120,
|
||||||
|
"default": 30
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def get_hook_description(hook_point: str) -> str:
|
||||||
|
"""Get description for each hook point"""
|
||||||
|
descriptions = {
|
||||||
|
"on_browser_created": "Called after browser instance is created",
|
||||||
|
"on_page_context_created": "Called after page and context are created - ideal for authentication",
|
||||||
|
"before_goto": "Called before navigating to the target URL",
|
||||||
|
"after_goto": "Called after navigation is complete",
|
||||||
|
"on_user_agent_updated": "Called when user agent is updated",
|
||||||
|
"on_execution_started": "Called when custom JavaScript execution begins",
|
||||||
|
"before_retrieve_html": "Called before retrieving the final HTML - ideal for scrolling",
|
||||||
|
"before_return_html": "Called just before returning the HTML content"
|
||||||
|
}
|
||||||
|
return descriptions.get(hook_point, "")
|
||||||
|
|
||||||
|
|
||||||
|
def get_hook_example(hook_point: str) -> str:
|
||||||
|
"""Get example code for each hook point"""
|
||||||
|
examples = {
|
||||||
|
"on_page_context_created": """async def hook(page, context, **kwargs):
|
||||||
|
# Add authentication cookie
|
||||||
|
await context.add_cookies([{
|
||||||
|
'name': 'session',
|
||||||
|
'value': 'my-session-id',
|
||||||
|
'domain': '.example.com'
|
||||||
|
}])
|
||||||
|
return page""",
|
||||||
|
|
||||||
|
"before_retrieve_html": """async def hook(page, context, **kwargs):
|
||||||
|
# Scroll to load lazy content
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(2000)
|
||||||
|
return page""",
|
||||||
|
|
||||||
|
"before_goto": """async def hook(page, context, url, **kwargs):
|
||||||
|
# Set custom headers
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'X-Custom-Header': 'value'
|
||||||
|
})
|
||||||
|
return page"""
|
||||||
|
}
|
||||||
|
return examples.get(hook_point, "# Implement your hook logic here\nreturn page")
|
||||||
|
|
||||||
|
|
||||||
@app.get(config["observability"]["health_check"]["endpoint"])
|
@app.get(config["observability"]["health_check"]["endpoint"])
|
||||||
async def health():
|
async def health():
|
||||||
return {"status": "ok", "timestamp": time.time(), "version": __version__}
|
return {"status": "ok", "timestamp": time.time(), "version": __version__}
|
||||||
@@ -477,12 +543,13 @@ async def metrics():
|
|||||||
@mcp_tool("crawl")
|
@mcp_tool("crawl")
|
||||||
async def crawl(
|
async def crawl(
|
||||||
request: Request,
|
request: Request,
|
||||||
crawl_request: CrawlRequest,
|
crawl_request: CrawlRequestWithHooks,
|
||||||
_td: Dict = Depends(token_dep),
|
_td: Dict = Depends(token_dep),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Crawl a list of URLs and return the results as JSON.
|
Crawl a list of URLs and return the results as JSON.
|
||||||
For streaming responses, use /crawl/stream endpoint.
|
For streaming responses, use /crawl/stream endpoint.
|
||||||
|
Supports optional user-provided hook functions for customization.
|
||||||
"""
|
"""
|
||||||
if not crawl_request.urls:
|
if not crawl_request.urls:
|
||||||
raise HTTPException(400, "At least one URL required")
|
raise HTTPException(400, "At least one URL required")
|
||||||
@@ -490,11 +557,21 @@ async def crawl(
|
|||||||
crawler_config = CrawlerRunConfig.load(crawl_request.crawler_config)
|
crawler_config = CrawlerRunConfig.load(crawl_request.crawler_config)
|
||||||
if crawler_config.stream:
|
if crawler_config.stream:
|
||||||
return await stream_process(crawl_request=crawl_request)
|
return await stream_process(crawl_request=crawl_request)
|
||||||
|
|
||||||
|
# Prepare hooks config if provided
|
||||||
|
hooks_config = None
|
||||||
|
if crawl_request.hooks:
|
||||||
|
hooks_config = {
|
||||||
|
'code': crawl_request.hooks.code,
|
||||||
|
'timeout': crawl_request.hooks.timeout
|
||||||
|
}
|
||||||
|
|
||||||
results = await handle_crawl_request(
|
results = await handle_crawl_request(
|
||||||
urls=crawl_request.urls,
|
urls=crawl_request.urls,
|
||||||
browser_config=crawl_request.browser_config,
|
browser_config=crawl_request.browser_config,
|
||||||
crawler_config=crawl_request.crawler_config,
|
crawler_config=crawl_request.crawler_config,
|
||||||
config=config,
|
config=config,
|
||||||
|
hooks_config=hooks_config
|
||||||
)
|
)
|
||||||
# check if all of the results are not successful
|
# check if all of the results are not successful
|
||||||
if all(not result["success"] for result in results["results"]):
|
if all(not result["success"] for result in results["results"]):
|
||||||
@@ -506,7 +583,7 @@ async def crawl(
|
|||||||
@limiter.limit(config["rate_limiting"]["default_limit"])
|
@limiter.limit(config["rate_limiting"]["default_limit"])
|
||||||
async def crawl_stream(
|
async def crawl_stream(
|
||||||
request: Request,
|
request: Request,
|
||||||
crawl_request: CrawlRequest,
|
crawl_request: CrawlRequestWithHooks,
|
||||||
_td: Dict = Depends(token_dep),
|
_td: Dict = Depends(token_dep),
|
||||||
):
|
):
|
||||||
if not crawl_request.urls:
|
if not crawl_request.urls:
|
||||||
@@ -514,21 +591,38 @@ async def crawl_stream(
|
|||||||
|
|
||||||
return await stream_process(crawl_request=crawl_request)
|
return await stream_process(crawl_request=crawl_request)
|
||||||
|
|
||||||
async def stream_process(crawl_request: CrawlRequest):
|
async def stream_process(crawl_request: CrawlRequestWithHooks):
|
||||||
crawler, gen = await handle_stream_crawl_request(
|
|
||||||
|
# Prepare hooks config if provided# Prepare hooks config if provided
|
||||||
|
hooks_config = None
|
||||||
|
if crawl_request.hooks:
|
||||||
|
hooks_config = {
|
||||||
|
'code': crawl_request.hooks.code,
|
||||||
|
'timeout': crawl_request.hooks.timeout
|
||||||
|
}
|
||||||
|
|
||||||
|
crawler, gen, hooks_info = await handle_stream_crawl_request(
|
||||||
urls=crawl_request.urls,
|
urls=crawl_request.urls,
|
||||||
browser_config=crawl_request.browser_config,
|
browser_config=crawl_request.browser_config,
|
||||||
crawler_config=crawl_request.crawler_config,
|
crawler_config=crawl_request.crawler_config,
|
||||||
config=config,
|
config=config,
|
||||||
)
|
hooks_config=hooks_config
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add hooks info to response headers if available
|
||||||
|
headers = {
|
||||||
|
"Cache-Control": "no-cache",
|
||||||
|
"Connection": "keep-alive",
|
||||||
|
"X-Stream-Status": "active",
|
||||||
|
}
|
||||||
|
if hooks_info:
|
||||||
|
import json
|
||||||
|
headers["X-Hooks-Status"] = json.dumps(hooks_info['status']['status'])
|
||||||
|
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
stream_results(crawler, gen),
|
stream_results(crawler, gen),
|
||||||
media_type="application/x-ndjson",
|
media_type="application/x-ndjson",
|
||||||
headers={
|
headers=headers,
|
||||||
"Cache-Control": "no-cache",
|
|
||||||
"Connection": "keep-alive",
|
|
||||||
"X-Stream-Status": "active",
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
159
deploy/docker/webhook.py
Normal file
159
deploy/docker/webhook.py
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
"""
|
||||||
|
Webhook delivery service for Crawl4AI.
|
||||||
|
|
||||||
|
This module provides webhook notification functionality with exponential backoff retry logic.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import httpx
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Optional
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class WebhookDeliveryService:
|
||||||
|
"""Handles webhook delivery with exponential backoff retry logic."""
|
||||||
|
|
||||||
|
def __init__(self, config: Dict):
|
||||||
|
"""
|
||||||
|
Initialize the webhook delivery service.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Application configuration dictionary containing webhook settings
|
||||||
|
"""
|
||||||
|
self.config = config.get("webhooks", {})
|
||||||
|
self.max_attempts = self.config.get("retry", {}).get("max_attempts", 5)
|
||||||
|
self.initial_delay = self.config.get("retry", {}).get("initial_delay_ms", 1000) / 1000
|
||||||
|
self.max_delay = self.config.get("retry", {}).get("max_delay_ms", 32000) / 1000
|
||||||
|
self.timeout = self.config.get("retry", {}).get("timeout_ms", 30000) / 1000
|
||||||
|
|
||||||
|
async def send_webhook(
|
||||||
|
self,
|
||||||
|
webhook_url: str,
|
||||||
|
payload: Dict,
|
||||||
|
headers: Optional[Dict[str, str]] = None
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Send webhook with exponential backoff retry logic.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
webhook_url: The URL to send the webhook to
|
||||||
|
payload: The JSON payload to send
|
||||||
|
headers: Optional custom headers
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if delivered successfully, False otherwise
|
||||||
|
"""
|
||||||
|
default_headers = self.config.get("headers", {})
|
||||||
|
merged_headers = {**default_headers, **(headers or {})}
|
||||||
|
merged_headers["Content-Type"] = "application/json"
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||||
|
for attempt in range(self.max_attempts):
|
||||||
|
try:
|
||||||
|
logger.info(
|
||||||
|
f"Sending webhook (attempt {attempt + 1}/{self.max_attempts}) to {webhook_url}"
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
webhook_url,
|
||||||
|
json=payload,
|
||||||
|
headers=merged_headers
|
||||||
|
)
|
||||||
|
|
||||||
|
# Success or client error (don't retry client errors)
|
||||||
|
if response.status_code < 500:
|
||||||
|
if 200 <= response.status_code < 300:
|
||||||
|
logger.info(f"Webhook delivered successfully to {webhook_url}")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"Webhook rejected with status {response.status_code}: {response.text[:200]}"
|
||||||
|
)
|
||||||
|
return False # Client error - don't retry
|
||||||
|
|
||||||
|
# Server error - retry with backoff
|
||||||
|
logger.warning(
|
||||||
|
f"Webhook failed with status {response.status_code}, will retry"
|
||||||
|
)
|
||||||
|
|
||||||
|
except httpx.TimeoutException as exc:
|
||||||
|
logger.error(f"Webhook timeout (attempt {attempt + 1}): {exc}")
|
||||||
|
except httpx.RequestError as exc:
|
||||||
|
logger.error(f"Webhook request error (attempt {attempt + 1}): {exc}")
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(f"Webhook delivery error (attempt {attempt + 1}): {exc}")
|
||||||
|
|
||||||
|
# Calculate exponential backoff delay
|
||||||
|
if attempt < self.max_attempts - 1:
|
||||||
|
delay = min(self.initial_delay * (2 ** attempt), self.max_delay)
|
||||||
|
logger.info(f"Retrying in {delay}s...")
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
|
||||||
|
logger.error(
|
||||||
|
f"Webhook delivery failed after {self.max_attempts} attempts to {webhook_url}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def notify_job_completion(
|
||||||
|
self,
|
||||||
|
task_id: str,
|
||||||
|
task_type: str,
|
||||||
|
status: str,
|
||||||
|
urls: list,
|
||||||
|
webhook_config: Optional[Dict],
|
||||||
|
result: Optional[Dict] = None,
|
||||||
|
error: Optional[str] = None
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Notify webhook of job completion.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
task_id: The task identifier
|
||||||
|
task_type: Type of task (e.g., "crawl", "llm_extraction")
|
||||||
|
status: Task status ("completed" or "failed")
|
||||||
|
urls: List of URLs that were crawled
|
||||||
|
webhook_config: Webhook configuration from the job request
|
||||||
|
result: Optional crawl result data
|
||||||
|
error: Optional error message if failed
|
||||||
|
"""
|
||||||
|
# Determine webhook URL
|
||||||
|
webhook_url = None
|
||||||
|
data_in_payload = self.config.get("data_in_payload", False)
|
||||||
|
custom_headers = None
|
||||||
|
|
||||||
|
if webhook_config:
|
||||||
|
webhook_url = webhook_config.get("webhook_url")
|
||||||
|
data_in_payload = webhook_config.get("webhook_data_in_payload", data_in_payload)
|
||||||
|
custom_headers = webhook_config.get("webhook_headers")
|
||||||
|
|
||||||
|
if not webhook_url:
|
||||||
|
webhook_url = self.config.get("default_url")
|
||||||
|
|
||||||
|
if not webhook_url:
|
||||||
|
logger.debug("No webhook URL configured, skipping notification")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Check if webhooks are enabled
|
||||||
|
if not self.config.get("enabled", True):
|
||||||
|
logger.debug("Webhooks are disabled, skipping notification")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Build payload
|
||||||
|
payload = {
|
||||||
|
"task_id": task_id,
|
||||||
|
"task_type": task_type,
|
||||||
|
"status": status,
|
||||||
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"urls": urls
|
||||||
|
}
|
||||||
|
|
||||||
|
if error:
|
||||||
|
payload["error"] = error
|
||||||
|
|
||||||
|
if data_in_payload and result:
|
||||||
|
payload["data"] = result
|
||||||
|
|
||||||
|
# Send webhook (fire and forget - don't block on completion)
|
||||||
|
await self.send_webhook(webhook_url, payload, custom_headers)
|
||||||
@@ -10,7 +10,6 @@ Today I'm releasing Crawl4AI v0.7.4—the Intelligent Table Extraction & Perform
|
|||||||
|
|
||||||
- **🚀 LLMTableExtraction**: Revolutionary table extraction with intelligent chunking for massive tables
|
- **🚀 LLMTableExtraction**: Revolutionary table extraction with intelligent chunking for massive tables
|
||||||
- **⚡ Enhanced Concurrency**: True concurrency improvements for fast-completing tasks in batch operations
|
- **⚡ Enhanced Concurrency**: True concurrency improvements for fast-completing tasks in batch operations
|
||||||
- **🧹 Memory Management Refactor**: Streamlined memory utilities and better resource management
|
|
||||||
- **🔧 Browser Manager Fixes**: Resolved race conditions in concurrent page creation
|
- **🔧 Browser Manager Fixes**: Resolved race conditions in concurrent page creation
|
||||||
- **⌨️ Cross-Platform Browser Profiler**: Improved keyboard handling and quit mechanisms
|
- **⌨️ Cross-Platform Browser Profiler**: Improved keyboard handling and quit mechanisms
|
||||||
- **🔗 Advanced URL Processing**: Better handling of raw URLs and base tag link resolution
|
- **🔗 Advanced URL Processing**: Better handling of raw URLs and base tag link resolution
|
||||||
@@ -158,40 +157,6 @@ async with AsyncWebCrawler() as crawler:
|
|||||||
- **Monitoring Systems**: Faster health checks and status page monitoring
|
- **Monitoring Systems**: Faster health checks and status page monitoring
|
||||||
- **Data Aggregation**: Improved performance for real-time data collection
|
- **Data Aggregation**: Improved performance for real-time data collection
|
||||||
|
|
||||||
## 🧹 Memory Management Refactor: Cleaner Architecture
|
|
||||||
|
|
||||||
**The Problem:** Memory utilities were scattered and difficult to maintain, with potential import conflicts and unclear organization.
|
|
||||||
|
|
||||||
**My Solution:** I consolidated all memory-related utilities into the main `utils.py` module, creating a cleaner, more maintainable architecture.
|
|
||||||
|
|
||||||
### Improved Memory Handling
|
|
||||||
|
|
||||||
```python
|
|
||||||
# All memory utilities now consolidated
|
|
||||||
from crawl4ai.utils import get_true_memory_usage_percent, MemoryMonitor
|
|
||||||
|
|
||||||
# Enhanced memory monitoring
|
|
||||||
monitor = MemoryMonitor()
|
|
||||||
monitor.start_monitoring()
|
|
||||||
|
|
||||||
async with AsyncWebCrawler() as crawler:
|
|
||||||
# Memory-efficient batch processing
|
|
||||||
results = await crawler.arun_many(large_url_list)
|
|
||||||
|
|
||||||
# Get accurate memory metrics
|
|
||||||
memory_usage = get_true_memory_usage_percent()
|
|
||||||
memory_report = monitor.get_report()
|
|
||||||
|
|
||||||
print(f"Memory efficiency: {memory_report['efficiency']:.1f}%")
|
|
||||||
print(f"Peak usage: {memory_report['peak_mb']:.1f} MB")
|
|
||||||
```
|
|
||||||
|
|
||||||
**Expected Real-World Impact:**
|
|
||||||
- **Production Stability**: More reliable memory tracking and management
|
|
||||||
- **Code Maintainability**: Cleaner architecture for easier debugging
|
|
||||||
- **Import Clarity**: Resolved potential conflicts and import issues
|
|
||||||
- **Developer Experience**: Simpler API for memory monitoring
|
|
||||||
|
|
||||||
## 🔧 Critical Stability Fixes
|
## 🔧 Critical Stability Fixes
|
||||||
|
|
||||||
### Browser Manager Race Condition Resolution
|
### Browser Manager Race Condition Resolution
|
||||||
|
|||||||
318
docs/blog/release-v0.7.5.md
Normal file
318
docs/blog/release-v0.7.5.md
Normal file
@@ -0,0 +1,318 @@
|
|||||||
|
# 🚀 Crawl4AI v0.7.5: The Docker Hooks & Security Update
|
||||||
|
|
||||||
|
*September 29, 2025 • 8 min read*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Today I'm releasing Crawl4AI v0.7.5—focused on extensibility and security. This update introduces the Docker Hooks System for pipeline customization, enhanced LLM integration, and important security improvements.
|
||||||
|
|
||||||
|
## 🎯 What's New at a Glance
|
||||||
|
|
||||||
|
- **Docker Hooks System**: Custom Python functions at key pipeline points with function-based API
|
||||||
|
- **Function-Based Hooks**: New `hooks_to_string()` utility with Docker client auto-conversion
|
||||||
|
- **Enhanced LLM Integration**: Custom providers with temperature control
|
||||||
|
- **HTTPS Preservation**: Secure internal link handling
|
||||||
|
- **Bug Fixes**: Resolved multiple community-reported issues
|
||||||
|
- **Improved Docker Error Handling**: Better debugging and reliability
|
||||||
|
|
||||||
|
## 🔧 Docker Hooks System: Pipeline Customization
|
||||||
|
|
||||||
|
Every scraping project needs custom logic—authentication, performance optimization, content processing. Traditional solutions require forking or complex workarounds. Docker Hooks let you inject custom Python functions at 8 key points in the crawling pipeline.
|
||||||
|
|
||||||
|
### Real Example: Authentication & Performance
|
||||||
|
|
||||||
|
```python
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# Real working hooks for httpbin.org
|
||||||
|
hooks_config = {
|
||||||
|
"on_page_context_created": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print("Hook: Setting up page context")
|
||||||
|
# Block images to speed up crawling
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
|
||||||
|
print("Hook: Images blocked")
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_retrieve_html": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print("Hook: Before retrieving HTML")
|
||||||
|
# Scroll to bottom to load lazy content
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
print("Hook: Scrolled to bottom")
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_goto": """
|
||||||
|
async def hook(page, context, url, **kwargs):
|
||||||
|
print(f"Hook: About to navigate to {url}")
|
||||||
|
# Add custom headers
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'X-Test-Header': 'crawl4ai-hooks-test'
|
||||||
|
})
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Test with Docker API
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/html"],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_config,
|
||||||
|
"timeout": 30
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post("http://localhost:11235/crawl", json=payload)
|
||||||
|
result = response.json()
|
||||||
|
|
||||||
|
if result.get('success'):
|
||||||
|
print("✅ Hooks executed successfully!")
|
||||||
|
print(f"Content length: {len(result.get('markdown', ''))} characters")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Available Hook Points:**
|
||||||
|
- `on_browser_created`: Browser setup
|
||||||
|
- `on_page_context_created`: Page context configuration
|
||||||
|
- `before_goto`: Pre-navigation setup
|
||||||
|
- `after_goto`: Post-navigation processing
|
||||||
|
- `on_user_agent_updated`: User agent changes
|
||||||
|
- `on_execution_started`: Crawl initialization
|
||||||
|
- `before_retrieve_html`: Pre-extraction processing
|
||||||
|
- `before_return_html`: Final HTML processing
|
||||||
|
|
||||||
|
### Function-Based Hooks API
|
||||||
|
|
||||||
|
Writing hooks as strings works, but lacks IDE support and type checking. v0.7.5 introduces a function-based approach with automatic conversion!
|
||||||
|
|
||||||
|
**Option 1: Using the `hooks_to_string()` Utility**
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crawl4ai import hooks_to_string
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# Define hooks as regular Python functions (with full IDE support!)
|
||||||
|
async def on_page_context_created(page, context, **kwargs):
|
||||||
|
"""Block images to speed up crawling"""
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def before_goto(page, context, url, **kwargs):
|
||||||
|
"""Add custom headers"""
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'X-Crawl4AI': 'v0.7.5',
|
||||||
|
'X-Custom-Header': 'my-value'
|
||||||
|
})
|
||||||
|
return page
|
||||||
|
|
||||||
|
# Convert functions to strings
|
||||||
|
hooks_code = hooks_to_string({
|
||||||
|
"on_page_context_created": on_page_context_created,
|
||||||
|
"before_goto": before_goto
|
||||||
|
})
|
||||||
|
|
||||||
|
# Use with REST API
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/html"],
|
||||||
|
"hooks": {"code": hooks_code, "timeout": 30}
|
||||||
|
}
|
||||||
|
response = requests.post("http://localhost:11235/crawl", json=payload)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Option 2: Docker Client with Automatic Conversion (Recommended!)**
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crawl4ai.docker_client import Crawl4aiDockerClient
|
||||||
|
|
||||||
|
# Define hooks as functions (same as above)
|
||||||
|
async def on_page_context_created(page, context, **kwargs):
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def before_retrieve_html(page, context, **kwargs):
|
||||||
|
# Scroll to load lazy content
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
return page
|
||||||
|
|
||||||
|
# Use Docker client - conversion happens automatically!
|
||||||
|
client = Crawl4aiDockerClient(base_url="http://localhost:11235")
|
||||||
|
|
||||||
|
results = await client.crawl(
|
||||||
|
urls=["https://httpbin.org/html"],
|
||||||
|
hooks={
|
||||||
|
"on_page_context_created": on_page_context_created,
|
||||||
|
"before_retrieve_html": before_retrieve_html
|
||||||
|
},
|
||||||
|
hooks_timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if results and results.success:
|
||||||
|
print(f"✅ Hooks executed! HTML length: {len(results.html)}")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits of Function-Based Hooks:**
|
||||||
|
- ✅ Full IDE support (autocomplete, syntax highlighting)
|
||||||
|
- ✅ Type checking and linting
|
||||||
|
- ✅ Easier to test and debug
|
||||||
|
- ✅ Reusable across projects
|
||||||
|
- ✅ Automatic conversion in Docker client
|
||||||
|
- ✅ No breaking changes - string hooks still work!
|
||||||
|
|
||||||
|
## 🤖 Enhanced LLM Integration
|
||||||
|
|
||||||
|
Enhanced LLM integration with custom providers, temperature control, and base URL configuration.
|
||||||
|
|
||||||
|
### Multi-Provider Support
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
|
||||||
|
from crawl4ai.extraction_strategy import LLMExtractionStrategy
|
||||||
|
|
||||||
|
# Test with different providers
|
||||||
|
async def test_llm_providers():
|
||||||
|
# OpenAI with custom temperature
|
||||||
|
openai_strategy = LLMExtractionStrategy(
|
||||||
|
provider="gemini/gemini-2.5-flash-lite",
|
||||||
|
api_token="your-api-token",
|
||||||
|
temperature=0.7, # New in v0.7.5
|
||||||
|
instruction="Summarize this page in one sentence"
|
||||||
|
)
|
||||||
|
|
||||||
|
async with AsyncWebCrawler() as crawler:
|
||||||
|
result = await crawler.arun(
|
||||||
|
"https://example.com",
|
||||||
|
config=CrawlerRunConfig(extraction_strategy=openai_strategy)
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.success:
|
||||||
|
print("✅ LLM extraction completed")
|
||||||
|
print(result.extracted_content)
|
||||||
|
|
||||||
|
# Docker API with enhanced LLM config
|
||||||
|
llm_payload = {
|
||||||
|
"url": "https://example.com",
|
||||||
|
"f": "llm",
|
||||||
|
"q": "Summarize this page in one sentence.",
|
||||||
|
"provider": "gemini/gemini-2.5-flash-lite",
|
||||||
|
"temperature": 0.7
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post("http://localhost:11235/md", json=llm_payload)
|
||||||
|
```
|
||||||
|
|
||||||
|
**New Features:**
|
||||||
|
- Custom `temperature` parameter for creativity control
|
||||||
|
- `base_url` for custom API endpoints
|
||||||
|
- Multi-provider environment variable support
|
||||||
|
- Docker API integration
|
||||||
|
|
||||||
|
## 🔒 HTTPS Preservation
|
||||||
|
|
||||||
|
**The Problem:** Modern web apps require HTTPS everywhere. When crawlers downgrade internal links from HTTPS to HTTP, authentication breaks and security warnings appear.
|
||||||
|
|
||||||
|
**Solution:** HTTPS preservation maintains secure protocols throughout crawling.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, FilterChain, URLPatternFilter, BFSDeepCrawlStrategy
|
||||||
|
|
||||||
|
async def test_https_preservation():
|
||||||
|
# Enable HTTPS preservation
|
||||||
|
url_filter = URLPatternFilter(
|
||||||
|
patterns=["^(https:\/\/)?quotes\.toscrape\.com(\/.*)?$"]
|
||||||
|
)
|
||||||
|
|
||||||
|
config = CrawlerRunConfig(
|
||||||
|
exclude_external_links=True,
|
||||||
|
preserve_https_for_internal_links=True, # New in v0.7.5
|
||||||
|
deep_crawl_strategy=BFSDeepCrawlStrategy(
|
||||||
|
max_depth=2,
|
||||||
|
max_pages=5,
|
||||||
|
filter_chain=FilterChain([url_filter])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
async with AsyncWebCrawler() as crawler:
|
||||||
|
async for result in await crawler.arun(
|
||||||
|
url="https://quotes.toscrape.com",
|
||||||
|
config=config
|
||||||
|
):
|
||||||
|
# All internal links maintain HTTPS
|
||||||
|
internal_links = [link['href'] for link in result.links['internal']]
|
||||||
|
https_links = [link for link in internal_links if link.startswith('https://')]
|
||||||
|
|
||||||
|
print(f"HTTPS links preserved: {len(https_links)}/{len(internal_links)}")
|
||||||
|
for link in https_links[:3]:
|
||||||
|
print(f" → {link}")
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🛠️ Bug Fixes and Improvements
|
||||||
|
|
||||||
|
### Major Fixes
|
||||||
|
- **URL Processing**: Fixed '+' sign preservation in query parameters (#1332)
|
||||||
|
- **Proxy Configuration**: Enhanced proxy string parsing (old `proxy` parameter deprecated)
|
||||||
|
- **Docker Error Handling**: Comprehensive error messages with status codes
|
||||||
|
- **Memory Management**: Fixed leaks in long-running sessions
|
||||||
|
- **JWT Authentication**: Fixed Docker JWT validation issues (#1442)
|
||||||
|
- **Playwright Stealth**: Fixed stealth features for Playwright integration (#1481)
|
||||||
|
- **API Configuration**: Fixed config handling to prevent overriding user-provided settings (#1505)
|
||||||
|
- **Docker Filter Serialization**: Resolved JSON encoding errors in deep crawl strategy (#1419)
|
||||||
|
- **LLM Provider Support**: Fixed custom LLM provider integration for adaptive crawler (#1291)
|
||||||
|
- **Performance Issues**: Resolved backoff strategy failures and timeout handling (#989)
|
||||||
|
|
||||||
|
### Community-Reported Issues Fixed
|
||||||
|
This release addresses multiple issues reported by the community through GitHub issues and Discord discussions:
|
||||||
|
- Fixed browser configuration reference errors
|
||||||
|
- Resolved dependency conflicts with cssselect
|
||||||
|
- Improved error messaging for failed authentications
|
||||||
|
- Enhanced compatibility with various proxy configurations
|
||||||
|
- Fixed edge cases in URL normalization
|
||||||
|
|
||||||
|
### Configuration Updates
|
||||||
|
```python
|
||||||
|
# Old proxy config (deprecated)
|
||||||
|
# browser_config = BrowserConfig(proxy="http://proxy:8080")
|
||||||
|
|
||||||
|
# New enhanced proxy config
|
||||||
|
browser_config = BrowserConfig(
|
||||||
|
proxy_config={
|
||||||
|
"server": "http://proxy:8080",
|
||||||
|
"username": "optional-user",
|
||||||
|
"password": "optional-pass"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🔄 Breaking Changes
|
||||||
|
|
||||||
|
1. **Python 3.10+ Required**: Upgrade from Python 3.9
|
||||||
|
2. **Proxy Parameter Deprecated**: Use new `proxy_config` structure
|
||||||
|
3. **New Dependency**: Added `cssselect` for better CSS handling
|
||||||
|
|
||||||
|
## 🚀 Get Started
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install latest version
|
||||||
|
pip install crawl4ai==0.7.5
|
||||||
|
|
||||||
|
# Docker deployment
|
||||||
|
docker pull unclecode/crawl4ai:latest
|
||||||
|
docker run -p 11235:11235 unclecode/crawl4ai:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
**Try the Demo:**
|
||||||
|
```bash
|
||||||
|
# Run working examples
|
||||||
|
python docs/releases_review/demo_v0.7.5.py
|
||||||
|
```
|
||||||
|
|
||||||
|
**Resources:**
|
||||||
|
- 📖 Documentation: [docs.crawl4ai.com](https://docs.crawl4ai.com)
|
||||||
|
- 🐙 GitHub: [github.com/unclecode/crawl4ai](https://github.com/unclecode/crawl4ai)
|
||||||
|
- 💬 Discord: [discord.gg/crawl4ai](https://discord.gg/jP8KfhDhyN)
|
||||||
|
- 🐦 Twitter: [@unclecode](https://x.com/unclecode)
|
||||||
|
|
||||||
|
Happy crawling! 🕷️
|
||||||
314
docs/blog/release-v0.7.6.md
Normal file
314
docs/blog/release-v0.7.6.md
Normal file
@@ -0,0 +1,314 @@
|
|||||||
|
# Crawl4AI v0.7.6 Release Notes
|
||||||
|
|
||||||
|
*Release Date: October 22, 2025*
|
||||||
|
|
||||||
|
I'm excited to announce Crawl4AI v0.7.6, featuring a complete webhook infrastructure for the Docker job queue API! This release eliminates polling and brings real-time notifications to both crawling and LLM extraction workflows.
|
||||||
|
|
||||||
|
## 🎯 What's New
|
||||||
|
|
||||||
|
### Webhook Support for Docker Job Queue API
|
||||||
|
|
||||||
|
The headline feature of v0.7.6 is comprehensive webhook support for asynchronous job processing. No more constant polling to check if your jobs are done - get instant notifications when they complete!
|
||||||
|
|
||||||
|
**Key Capabilities:**
|
||||||
|
|
||||||
|
- ✅ **Universal Webhook Support**: Both `/crawl/job` and `/llm/job` endpoints now support webhooks
|
||||||
|
- ✅ **Flexible Delivery Modes**: Choose notification-only or include full data in the webhook payload
|
||||||
|
- ✅ **Reliable Delivery**: Exponential backoff retry mechanism (5 attempts: 1s → 2s → 4s → 8s → 16s)
|
||||||
|
- ✅ **Custom Authentication**: Add custom headers for webhook authentication
|
||||||
|
- ✅ **Global Configuration**: Set default webhook URL in `config.yml` for all jobs
|
||||||
|
- ✅ **Task Type Identification**: Distinguish between `crawl` and `llm_extraction` tasks
|
||||||
|
|
||||||
|
### How It Works
|
||||||
|
|
||||||
|
Instead of constantly checking job status:
|
||||||
|
|
||||||
|
**OLD WAY (Polling):**
|
||||||
|
```python
|
||||||
|
# Submit job
|
||||||
|
response = requests.post("http://localhost:11235/crawl/job", json=payload)
|
||||||
|
task_id = response.json()['task_id']
|
||||||
|
|
||||||
|
# Poll until complete
|
||||||
|
while True:
|
||||||
|
status = requests.get(f"http://localhost:11235/crawl/job/{task_id}")
|
||||||
|
if status.json()['status'] == 'completed':
|
||||||
|
break
|
||||||
|
time.sleep(5) # Wait and try again
|
||||||
|
```
|
||||||
|
|
||||||
|
**NEW WAY (Webhooks):**
|
||||||
|
```python
|
||||||
|
# Submit job with webhook
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhook",
|
||||||
|
"webhook_data_in_payload": True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
response = requests.post("http://localhost:11235/crawl/job", json=payload)
|
||||||
|
|
||||||
|
# Done! Webhook will notify you when complete
|
||||||
|
# Your webhook handler receives the results automatically
|
||||||
|
```
|
||||||
|
|
||||||
|
### Crawl Job Webhooks
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:11235/crawl/job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"browser_config": {"headless": true},
|
||||||
|
"crawler_config": {"cache_mode": "bypass"},
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhooks/crawl-complete",
|
||||||
|
"webhook_data_in_payload": false,
|
||||||
|
"webhook_headers": {
|
||||||
|
"X-Webhook-Secret": "your-secret-token"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### LLM Extraction Job Webhooks (NEW!)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:11235/llm/job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"url": "https://example.com/article",
|
||||||
|
"q": "Extract the article title, author, and publication date",
|
||||||
|
"schema": "{\"type\":\"object\",\"properties\":{\"title\":{\"type\":\"string\"}}}",
|
||||||
|
"provider": "openai/gpt-4o-mini",
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhooks/llm-complete",
|
||||||
|
"webhook_data_in_payload": true
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Webhook Payload Structure
|
||||||
|
|
||||||
|
**Success (with data):**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"task_id": "llm_1698765432",
|
||||||
|
"task_type": "llm_extraction",
|
||||||
|
"status": "completed",
|
||||||
|
"timestamp": "2025-10-22T10:30:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com/article"],
|
||||||
|
"data": {
|
||||||
|
"extracted_content": {
|
||||||
|
"title": "Understanding Web Scraping",
|
||||||
|
"author": "John Doe",
|
||||||
|
"date": "2025-10-22"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Failure:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"task_id": "crawl_abc123",
|
||||||
|
"task_type": "crawl",
|
||||||
|
"status": "failed",
|
||||||
|
"timestamp": "2025-10-22T10:30:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"error": "Connection timeout after 30s"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Simple Webhook Handler Example
|
||||||
|
|
||||||
|
```python
|
||||||
|
from flask import Flask, request, jsonify
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@app.route('/webhook', methods=['POST'])
|
||||||
|
def handle_webhook():
|
||||||
|
payload = request.json
|
||||||
|
|
||||||
|
task_id = payload['task_id']
|
||||||
|
task_type = payload['task_type']
|
||||||
|
status = payload['status']
|
||||||
|
|
||||||
|
if status == 'completed':
|
||||||
|
if 'data' in payload:
|
||||||
|
# Process data directly
|
||||||
|
data = payload['data']
|
||||||
|
else:
|
||||||
|
# Fetch from API
|
||||||
|
endpoint = 'crawl' if task_type == 'crawl' else 'llm'
|
||||||
|
response = requests.get(f'http://localhost:11235/{endpoint}/job/{task_id}')
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# Your business logic here
|
||||||
|
print(f"Job {task_id} completed!")
|
||||||
|
|
||||||
|
elif status == 'failed':
|
||||||
|
error = payload.get('error', 'Unknown error')
|
||||||
|
print(f"Job {task_id} failed: {error}")
|
||||||
|
|
||||||
|
return jsonify({"status": "received"}), 200
|
||||||
|
|
||||||
|
app.run(port=8080)
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📊 Performance Improvements
|
||||||
|
|
||||||
|
- **Reduced Server Load**: Eliminates constant polling requests
|
||||||
|
- **Lower Latency**: Instant notification vs. polling interval delay
|
||||||
|
- **Better Resource Usage**: Frees up client connections while jobs run in background
|
||||||
|
- **Scalable Architecture**: Handles high-volume crawling workflows efficiently
|
||||||
|
|
||||||
|
## 🐛 Bug Fixes
|
||||||
|
|
||||||
|
- Fixed webhook configuration serialization for Pydantic HttpUrl fields
|
||||||
|
- Improved error handling in webhook delivery service
|
||||||
|
- Enhanced Redis task storage for webhook config persistence
|
||||||
|
|
||||||
|
## 🌍 Expected Real-World Impact
|
||||||
|
|
||||||
|
### For Web Scraping Workflows
|
||||||
|
- **Reduced Costs**: Less API calls = lower bandwidth and server costs
|
||||||
|
- **Better UX**: Instant notifications improve user experience
|
||||||
|
- **Scalability**: Handle 100s of concurrent jobs without polling overhead
|
||||||
|
|
||||||
|
### For LLM Extraction Pipelines
|
||||||
|
- **Async Processing**: Submit LLM extraction jobs and move on
|
||||||
|
- **Batch Processing**: Queue multiple extractions, get notified as they complete
|
||||||
|
- **Integration**: Easy integration with workflow automation tools (Zapier, n8n, etc.)
|
||||||
|
|
||||||
|
### For Microservices
|
||||||
|
- **Event-Driven**: Perfect for event-driven microservice architectures
|
||||||
|
- **Decoupling**: Decouple job submission from result processing
|
||||||
|
- **Reliability**: Automatic retries ensure webhooks are delivered
|
||||||
|
|
||||||
|
## 🔄 Breaking Changes
|
||||||
|
|
||||||
|
**None!** This release is fully backward compatible.
|
||||||
|
|
||||||
|
- Webhook configuration is optional
|
||||||
|
- Existing code continues to work without modification
|
||||||
|
- Polling is still supported for jobs without webhook config
|
||||||
|
|
||||||
|
## 📚 Documentation
|
||||||
|
|
||||||
|
### New Documentation
|
||||||
|
- **[WEBHOOK_EXAMPLES.md](../deploy/docker/WEBHOOK_EXAMPLES.md)** - Comprehensive webhook usage guide
|
||||||
|
- **[docker_webhook_example.py](../docs/examples/docker_webhook_example.py)** - Working code examples
|
||||||
|
|
||||||
|
### Updated Documentation
|
||||||
|
- **[Docker README](../deploy/docker/README.md)** - Added webhook sections
|
||||||
|
- API documentation with webhook examples
|
||||||
|
|
||||||
|
## 🛠️ Migration Guide
|
||||||
|
|
||||||
|
No migration needed! Webhooks are opt-in:
|
||||||
|
|
||||||
|
1. **To use webhooks**: Add `webhook_config` to your job payload
|
||||||
|
2. **To keep polling**: Continue using your existing code
|
||||||
|
|
||||||
|
### Quick Start
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Just add webhook_config to your existing payload
|
||||||
|
payload = {
|
||||||
|
# Your existing configuration
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"browser_config": {...},
|
||||||
|
"crawler_config": {...},
|
||||||
|
|
||||||
|
# NEW: Add webhook configuration
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhook",
|
||||||
|
"webhook_data_in_payload": True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🔧 Configuration
|
||||||
|
|
||||||
|
### Global Webhook Configuration (config.yml)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
webhooks:
|
||||||
|
enabled: true
|
||||||
|
default_url: "https://myapp.com/webhooks/default" # Optional
|
||||||
|
data_in_payload: false
|
||||||
|
retry:
|
||||||
|
max_attempts: 5
|
||||||
|
initial_delay_ms: 1000
|
||||||
|
max_delay_ms: 32000
|
||||||
|
timeout_ms: 30000
|
||||||
|
headers:
|
||||||
|
User-Agent: "Crawl4AI-Webhook/1.0"
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🚀 Upgrade Instructions
|
||||||
|
|
||||||
|
### Docker
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Pull the latest image
|
||||||
|
docker pull unclecode/crawl4ai:0.7.6
|
||||||
|
|
||||||
|
# Or use latest tag
|
||||||
|
docker pull unclecode/crawl4ai:latest
|
||||||
|
|
||||||
|
# Run with webhook support
|
||||||
|
docker run -d \
|
||||||
|
-p 11235:11235 \
|
||||||
|
--env-file .llm.env \
|
||||||
|
--name crawl4ai \
|
||||||
|
unclecode/crawl4ai:0.7.6
|
||||||
|
```
|
||||||
|
|
||||||
|
### Python Package
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install --upgrade crawl4ai
|
||||||
|
```
|
||||||
|
|
||||||
|
## 💡 Pro Tips
|
||||||
|
|
||||||
|
1. **Use notification-only mode** for large results - fetch data separately to avoid large webhook payloads
|
||||||
|
2. **Set custom headers** for webhook authentication and request tracking
|
||||||
|
3. **Configure global default webhook** for consistent handling across all jobs
|
||||||
|
4. **Implement idempotent webhook handlers** - same webhook may be delivered multiple times on retry
|
||||||
|
5. **Use structured schemas** with LLM extraction for predictable webhook data
|
||||||
|
|
||||||
|
## 🎬 Demo
|
||||||
|
|
||||||
|
Try the release demo:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python docs/releases_review/demo_v0.7.6.py
|
||||||
|
```
|
||||||
|
|
||||||
|
This comprehensive demo showcases:
|
||||||
|
- Crawl job webhooks (notification-only and with data)
|
||||||
|
- LLM extraction webhooks (with JSON schema support)
|
||||||
|
- Custom headers for authentication
|
||||||
|
- Webhook retry mechanism
|
||||||
|
- Real-time webhook receiver
|
||||||
|
|
||||||
|
## 🙏 Acknowledgments
|
||||||
|
|
||||||
|
Thank you to the community for the feedback that shaped this feature! Special thanks to everyone who requested webhook support for asynchronous job processing.
|
||||||
|
|
||||||
|
## 📞 Support
|
||||||
|
|
||||||
|
- **Documentation**: https://docs.crawl4ai.com
|
||||||
|
- **GitHub Issues**: https://github.com/unclecode/crawl4ai/issues
|
||||||
|
- **Discord**: https://discord.gg/crawl4ai
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Happy crawling with webhooks!** 🕷️🪝
|
||||||
|
|
||||||
|
*- unclecode*
|
||||||
522
docs/examples/docker_client_hooks_example.py
Normal file
522
docs/examples/docker_client_hooks_example.py
Normal file
@@ -0,0 +1,522 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Comprehensive hooks examples using Docker Client with function objects.
|
||||||
|
|
||||||
|
This approach is recommended because:
|
||||||
|
- Write hooks as regular Python functions
|
||||||
|
- Full IDE support (autocomplete, type checking)
|
||||||
|
- Automatic conversion to API format
|
||||||
|
- Reusable and testable code
|
||||||
|
- Clean, readable syntax
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from crawl4ai import Crawl4aiDockerClient
|
||||||
|
|
||||||
|
# API_BASE_URL = "http://localhost:11235"
|
||||||
|
API_BASE_URL = "http://localhost:11234"
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Hook Function Definitions
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# --- All Hooks Demo ---
|
||||||
|
async def browser_created_hook(browser, **kwargs):
|
||||||
|
"""Called after browser is created"""
|
||||||
|
print("[HOOK] Browser created and ready")
|
||||||
|
return browser
|
||||||
|
|
||||||
|
|
||||||
|
async def page_context_hook(page, context, **kwargs):
|
||||||
|
"""Setup page environment"""
|
||||||
|
print("[HOOK] Setting up page environment")
|
||||||
|
|
||||||
|
# Set viewport
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
|
||||||
|
# Add cookies
|
||||||
|
await context.add_cookies([{
|
||||||
|
"name": "test_session",
|
||||||
|
"value": "abc123xyz",
|
||||||
|
"domain": ".httpbin.org",
|
||||||
|
"path": "/"
|
||||||
|
}])
|
||||||
|
|
||||||
|
# Block resources
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif}", lambda route: route.abort())
|
||||||
|
await context.route("**/analytics/*", lambda route: route.abort())
|
||||||
|
|
||||||
|
print("[HOOK] Environment configured")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def user_agent_hook(page, context, user_agent, **kwargs):
|
||||||
|
"""Called when user agent is updated"""
|
||||||
|
print(f"[HOOK] User agent: {user_agent[:50]}...")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def before_goto_hook(page, context, url, **kwargs):
|
||||||
|
"""Called before navigating to URL"""
|
||||||
|
print(f"[HOOK] Navigating to: {url}")
|
||||||
|
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
"X-Custom-Header": "crawl4ai-test",
|
||||||
|
"Accept-Language": "en-US"
|
||||||
|
})
|
||||||
|
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def after_goto_hook(page, context, url, response, **kwargs):
|
||||||
|
"""Called after page loads"""
|
||||||
|
print(f"[HOOK] Page loaded: {url}")
|
||||||
|
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
|
||||||
|
try:
|
||||||
|
await page.wait_for_selector("body", timeout=2000)
|
||||||
|
print("[HOOK] Body element ready")
|
||||||
|
except:
|
||||||
|
print("[HOOK] Timeout, continuing")
|
||||||
|
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def execution_started_hook(page, context, **kwargs):
|
||||||
|
"""Called when custom JS execution starts"""
|
||||||
|
print("[HOOK] JS execution started")
|
||||||
|
await page.evaluate("console.log('[HOOK] Custom JS');")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def before_retrieve_hook(page, context, **kwargs):
|
||||||
|
"""Called before retrieving HTML"""
|
||||||
|
print("[HOOK] Preparing HTML retrieval")
|
||||||
|
|
||||||
|
# Scroll for lazy content
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
|
||||||
|
await page.wait_for_timeout(500)
|
||||||
|
await page.evaluate("window.scrollTo(0, 0);")
|
||||||
|
|
||||||
|
print("[HOOK] Scrolling complete")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def before_return_hook(page, context, html, **kwargs):
|
||||||
|
"""Called before returning HTML"""
|
||||||
|
print(f"[HOOK] HTML ready: {len(html)} chars")
|
||||||
|
|
||||||
|
metrics = await page.evaluate('''() => ({
|
||||||
|
images: document.images.length,
|
||||||
|
links: document.links.length,
|
||||||
|
scripts: document.scripts.length
|
||||||
|
})''')
|
||||||
|
|
||||||
|
print(f"[HOOK] Metrics - Images: {metrics['images']}, Links: {metrics['links']}")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
# --- Authentication Hooks ---
|
||||||
|
async def auth_context_hook(page, context, **kwargs):
|
||||||
|
"""Setup authentication context"""
|
||||||
|
print("[HOOK] Setting up authentication")
|
||||||
|
|
||||||
|
# Add auth cookies
|
||||||
|
await context.add_cookies([{
|
||||||
|
"name": "auth_token",
|
||||||
|
"value": "fake_jwt_token",
|
||||||
|
"domain": ".httpbin.org",
|
||||||
|
"path": "/",
|
||||||
|
"httpOnly": True
|
||||||
|
}])
|
||||||
|
|
||||||
|
# Set localStorage
|
||||||
|
await page.evaluate('''
|
||||||
|
localStorage.setItem('user_id', '12345');
|
||||||
|
localStorage.setItem('auth_time', new Date().toISOString());
|
||||||
|
''')
|
||||||
|
|
||||||
|
print("[HOOK] Auth context ready")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def auth_headers_hook(page, context, url, **kwargs):
|
||||||
|
"""Add authentication headers"""
|
||||||
|
print(f"[HOOK] Adding auth headers for {url}")
|
||||||
|
|
||||||
|
import base64
|
||||||
|
credentials = base64.b64encode(b"user:passwd").decode('ascii')
|
||||||
|
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'Authorization': f'Basic {credentials}',
|
||||||
|
'X-API-Key': 'test-key-123'
|
||||||
|
})
|
||||||
|
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
# --- Performance Optimization Hooks ---
|
||||||
|
async def performance_hook(page, context, **kwargs):
|
||||||
|
"""Optimize page for performance"""
|
||||||
|
print("[HOOK] Optimizing for performance")
|
||||||
|
|
||||||
|
# Block resource-heavy content
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp,svg}", lambda r: r.abort())
|
||||||
|
await context.route("**/*.{woff,woff2,ttf}", lambda r: r.abort())
|
||||||
|
await context.route("**/*.{mp4,webm,ogg}", lambda r: r.abort())
|
||||||
|
await context.route("**/googletagmanager.com/*", lambda r: r.abort())
|
||||||
|
await context.route("**/google-analytics.com/*", lambda r: r.abort())
|
||||||
|
await context.route("**/facebook.com/*", lambda r: r.abort())
|
||||||
|
|
||||||
|
# Disable animations
|
||||||
|
await page.add_style_tag(content='''
|
||||||
|
*, *::before, *::after {
|
||||||
|
animation-duration: 0s !important;
|
||||||
|
transition-duration: 0s !important;
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
|
||||||
|
print("[HOOK] Optimizations applied")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def cleanup_hook(page, context, **kwargs):
|
||||||
|
"""Clean page before extraction"""
|
||||||
|
print("[HOOK] Cleaning page")
|
||||||
|
|
||||||
|
await page.evaluate('''() => {
|
||||||
|
const selectors = [
|
||||||
|
'.ad', '.ads', '.advertisement',
|
||||||
|
'.popup', '.modal', '.overlay',
|
||||||
|
'.cookie-banner', '.newsletter'
|
||||||
|
];
|
||||||
|
|
||||||
|
selectors.forEach(sel => {
|
||||||
|
document.querySelectorAll(sel).forEach(el => el.remove());
|
||||||
|
});
|
||||||
|
|
||||||
|
document.querySelectorAll('script, style').forEach(el => el.remove());
|
||||||
|
}''')
|
||||||
|
|
||||||
|
print("[HOOK] Page cleaned")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
# --- Content Extraction Hooks ---
|
||||||
|
async def wait_dynamic_content_hook(page, context, url, response, **kwargs):
|
||||||
|
"""Wait for dynamic content to load"""
|
||||||
|
print(f"[HOOK] Waiting for dynamic content on {url}")
|
||||||
|
|
||||||
|
await page.wait_for_timeout(2000)
|
||||||
|
|
||||||
|
# Click "Load More" if exists
|
||||||
|
try:
|
||||||
|
load_more = await page.query_selector('[class*="load-more"], button:has-text("Load More")')
|
||||||
|
if load_more:
|
||||||
|
await load_more.click()
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
print("[HOOK] Clicked 'Load More'")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def extract_metadata_hook(page, context, **kwargs):
|
||||||
|
"""Extract page metadata"""
|
||||||
|
print("[HOOK] Extracting metadata")
|
||||||
|
|
||||||
|
metadata = await page.evaluate('''() => {
|
||||||
|
const getMeta = (name) => {
|
||||||
|
const el = document.querySelector(`meta[name="${name}"], meta[property="${name}"]`);
|
||||||
|
return el ? el.getAttribute('content') : null;
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
title: document.title,
|
||||||
|
description: getMeta('description'),
|
||||||
|
author: getMeta('author'),
|
||||||
|
keywords: getMeta('keywords'),
|
||||||
|
};
|
||||||
|
}''')
|
||||||
|
|
||||||
|
print(f"[HOOK] Metadata: {metadata}")
|
||||||
|
|
||||||
|
# Infinite scroll
|
||||||
|
for i in range(3):
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
print(f"[HOOK] Scroll {i+1}/3")
|
||||||
|
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
# --- Multi-URL Hooks ---
|
||||||
|
async def url_specific_hook(page, context, url, **kwargs):
|
||||||
|
"""Apply URL-specific logic"""
|
||||||
|
print(f"[HOOK] Processing URL: {url}")
|
||||||
|
|
||||||
|
# URL-specific headers
|
||||||
|
if 'html' in url:
|
||||||
|
await page.set_extra_http_headers({"X-Type": "HTML"})
|
||||||
|
elif 'json' in url:
|
||||||
|
await page.set_extra_http_headers({"X-Type": "JSON"})
|
||||||
|
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def track_progress_hook(page, context, url, response, **kwargs):
|
||||||
|
"""Track crawl progress"""
|
||||||
|
status = response.status if response else 'unknown'
|
||||||
|
print(f"[HOOK] Loaded {url} - Status: {status}")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Test Functions
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
async def test_all_hooks_comprehensive():
|
||||||
|
"""Test all 8 hook types"""
|
||||||
|
print("=" * 70)
|
||||||
|
print("Test 1: All Hooks Comprehensive Demo (Docker Client)")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client:
|
||||||
|
print("\nCrawling with all 8 hooks...")
|
||||||
|
|
||||||
|
# Define hooks with function objects
|
||||||
|
hooks = {
|
||||||
|
"on_browser_created": browser_created_hook,
|
||||||
|
"on_page_context_created": page_context_hook,
|
||||||
|
"on_user_agent_updated": user_agent_hook,
|
||||||
|
"before_goto": before_goto_hook,
|
||||||
|
"after_goto": after_goto_hook,
|
||||||
|
"on_execution_started": execution_started_hook,
|
||||||
|
"before_retrieve_html": before_retrieve_hook,
|
||||||
|
"before_return_html": before_return_hook
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await client.crawl(
|
||||||
|
["https://httpbin.org/html"],
|
||||||
|
hooks=hooks,
|
||||||
|
hooks_timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n✅ Success!")
|
||||||
|
print(f" URL: {result.url}")
|
||||||
|
print(f" Success: {result.success}")
|
||||||
|
print(f" HTML: {len(result.html)} chars")
|
||||||
|
|
||||||
|
|
||||||
|
async def test_authentication_workflow():
|
||||||
|
"""Test authentication with hooks"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Test 2: Authentication Workflow (Docker Client)")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client:
|
||||||
|
print("\nTesting authentication...")
|
||||||
|
|
||||||
|
hooks = {
|
||||||
|
"on_page_context_created": auth_context_hook,
|
||||||
|
"before_goto": auth_headers_hook
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await client.crawl(
|
||||||
|
["https://httpbin.org/basic-auth/user/passwd"],
|
||||||
|
hooks=hooks,
|
||||||
|
hooks_timeout=15
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n✅ Authentication completed")
|
||||||
|
|
||||||
|
if result.success:
|
||||||
|
if '"authenticated"' in result.html and 'true' in result.html:
|
||||||
|
print(" ✅ Basic auth successful!")
|
||||||
|
else:
|
||||||
|
print(" ⚠️ Auth status unclear")
|
||||||
|
else:
|
||||||
|
print(f" ❌ Failed: {result.error_message}")
|
||||||
|
|
||||||
|
|
||||||
|
async def test_performance_optimization():
|
||||||
|
"""Test performance optimization"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Test 3: Performance Optimization (Docker Client)")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client:
|
||||||
|
print("\nTesting performance hooks...")
|
||||||
|
|
||||||
|
hooks = {
|
||||||
|
"on_page_context_created": performance_hook,
|
||||||
|
"before_retrieve_html": cleanup_hook
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await client.crawl(
|
||||||
|
["https://httpbin.org/html"],
|
||||||
|
hooks=hooks,
|
||||||
|
hooks_timeout=10
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n✅ Optimization completed")
|
||||||
|
print(f" HTML size: {len(result.html):,} chars")
|
||||||
|
print(" Resources blocked, ads removed")
|
||||||
|
|
||||||
|
|
||||||
|
async def test_content_extraction():
|
||||||
|
"""Test content extraction"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Test 4: Content Extraction (Docker Client)")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client:
|
||||||
|
print("\nTesting extraction hooks...")
|
||||||
|
|
||||||
|
hooks = {
|
||||||
|
"after_goto": wait_dynamic_content_hook,
|
||||||
|
"before_retrieve_html": extract_metadata_hook
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await client.crawl(
|
||||||
|
["https://www.kidocode.com/"],
|
||||||
|
hooks=hooks,
|
||||||
|
hooks_timeout=20
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n✅ Extraction completed")
|
||||||
|
print(f" URL: {result.url}")
|
||||||
|
print(f" Success: {result.success}")
|
||||||
|
print(f" Metadata: {result.metadata}")
|
||||||
|
|
||||||
|
|
||||||
|
async def test_multi_url_crawl():
|
||||||
|
"""Test hooks with multiple URLs"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Test 5: Multi-URL Crawl (Docker Client)")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client:
|
||||||
|
print("\nCrawling multiple URLs...")
|
||||||
|
|
||||||
|
hooks = {
|
||||||
|
"before_goto": url_specific_hook,
|
||||||
|
"after_goto": track_progress_hook
|
||||||
|
}
|
||||||
|
|
||||||
|
results = await client.crawl(
|
||||||
|
[
|
||||||
|
"https://httpbin.org/html",
|
||||||
|
"https://httpbin.org/json",
|
||||||
|
"https://httpbin.org/xml"
|
||||||
|
],
|
||||||
|
hooks=hooks,
|
||||||
|
hooks_timeout=15
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n✅ Multi-URL crawl completed")
|
||||||
|
print(f"\n Crawled {len(results)} URLs:")
|
||||||
|
for i, result in enumerate(results, 1):
|
||||||
|
status = "✅" if result.success else "❌"
|
||||||
|
print(f" {status} {i}. {result.url}")
|
||||||
|
|
||||||
|
|
||||||
|
async def test_reusable_hook_library():
|
||||||
|
"""Test using reusable hook library"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Test 6: Reusable Hook Library (Docker Client)")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# Create a library of reusable hooks
|
||||||
|
class HookLibrary:
|
||||||
|
@staticmethod
|
||||||
|
async def block_images(page, context, **kwargs):
|
||||||
|
"""Block all images"""
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif}", lambda r: r.abort())
|
||||||
|
print("[LIBRARY] Images blocked")
|
||||||
|
return page
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def block_analytics(page, context, **kwargs):
|
||||||
|
"""Block analytics"""
|
||||||
|
await context.route("**/analytics/*", lambda r: r.abort())
|
||||||
|
await context.route("**/google-analytics.com/*", lambda r: r.abort())
|
||||||
|
print("[LIBRARY] Analytics blocked")
|
||||||
|
return page
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def scroll_infinite(page, context, **kwargs):
|
||||||
|
"""Handle infinite scroll"""
|
||||||
|
for i in range(5):
|
||||||
|
prev = await page.evaluate("document.body.scrollHeight")
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
curr = await page.evaluate("document.body.scrollHeight")
|
||||||
|
if curr == prev:
|
||||||
|
break
|
||||||
|
print("[LIBRARY] Infinite scroll complete")
|
||||||
|
return page
|
||||||
|
|
||||||
|
async with Crawl4aiDockerClient(base_url=API_BASE_URL, verbose=False) as client:
|
||||||
|
print("\nUsing hook library...")
|
||||||
|
|
||||||
|
hooks = {
|
||||||
|
"on_page_context_created": HookLibrary.block_images,
|
||||||
|
"before_retrieve_html": HookLibrary.scroll_infinite
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await client.crawl(
|
||||||
|
["https://www.kidocode.com/"],
|
||||||
|
hooks=hooks,
|
||||||
|
hooks_timeout=20
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n✅ Library hooks completed")
|
||||||
|
print(f" Success: {result.success}")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Main
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""Run all Docker client hook examples"""
|
||||||
|
print("🔧 Crawl4AI Docker Client - Hooks Examples (Function-Based)")
|
||||||
|
print("Using Python function objects with automatic conversion")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
tests = [
|
||||||
|
("All Hooks Demo", test_all_hooks_comprehensive),
|
||||||
|
("Authentication", test_authentication_workflow),
|
||||||
|
("Performance", test_performance_optimization),
|
||||||
|
("Extraction", test_content_extraction),
|
||||||
|
("Multi-URL", test_multi_url_crawl),
|
||||||
|
("Hook Library", test_reusable_hook_library)
|
||||||
|
]
|
||||||
|
|
||||||
|
for i, (name, test_func) in enumerate(tests, 1):
|
||||||
|
try:
|
||||||
|
await test_func()
|
||||||
|
print(f"\n✅ Test {i}/{len(tests)}: {name} completed\n")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n❌ Test {i}/{len(tests)}: {name} failed: {e}\n")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
print("=" * 70)
|
||||||
|
print("🎉 All Docker client hook examples completed!")
|
||||||
|
print("\n💡 Key Benefits of Function-Based Hooks:")
|
||||||
|
print(" • Write as regular Python functions")
|
||||||
|
print(" • Full IDE support (autocomplete, types)")
|
||||||
|
print(" • Automatic conversion to API format")
|
||||||
|
print(" • Reusable across projects")
|
||||||
|
print(" • Clean, readable code")
|
||||||
|
print(" • Easy to test and debug")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
627
docs/examples/docker_hooks_examples.py
Normal file
627
docs/examples/docker_hooks_examples.py
Normal file
@@ -0,0 +1,627 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
🚀 Crawl4AI Docker Hooks System - Complete Examples
|
||||||
|
====================================================
|
||||||
|
|
||||||
|
This file demonstrates the Docker Hooks System with three different approaches:
|
||||||
|
|
||||||
|
1. String-based hooks for REST API
|
||||||
|
2. hooks_to_string() utility to convert functions
|
||||||
|
3. Docker Client with automatic conversion (most convenient)
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Docker container running: docker run -p 11235:11235 unclecode/crawl4ai:latest
|
||||||
|
- crawl4ai installed: pip install crawl4ai
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
# Import Crawl4AI components
|
||||||
|
from crawl4ai import hooks_to_string
|
||||||
|
from crawl4ai.docker_client import Crawl4aiDockerClient
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
DOCKER_URL = "http://localhost:11235"
|
||||||
|
TEST_URLS = [
|
||||||
|
"https://www.kidocode.com",
|
||||||
|
"https://quotes.toscrape.com",
|
||||||
|
"https://httpbin.org/html",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def print_section(title: str, description: str = ""):
|
||||||
|
"""Print a formatted section header"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print(f" {title}")
|
||||||
|
if description:
|
||||||
|
print(f" {description}")
|
||||||
|
print("=" * 70 + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
def check_docker_service() -> bool:
|
||||||
|
"""Check if Docker service is running"""
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{DOCKER_URL}/health", timeout=3)
|
||||||
|
return response.status_code == 200
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# REUSABLE HOOK LIBRARY
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
async def performance_optimization_hook(page, context, **kwargs):
|
||||||
|
"""
|
||||||
|
Performance Hook: Block unnecessary resources to speed up crawling
|
||||||
|
"""
|
||||||
|
print(" [Hook] 🚀 Optimizing performance - blocking images and ads...")
|
||||||
|
|
||||||
|
# Block images
|
||||||
|
await context.route(
|
||||||
|
"**/*.{png,jpg,jpeg,gif,webp,svg,ico}",
|
||||||
|
lambda route: route.abort()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Block ads and analytics
|
||||||
|
await context.route("**/analytics/*", lambda route: route.abort())
|
||||||
|
await context.route("**/ads/*", lambda route: route.abort())
|
||||||
|
await context.route("**/google-analytics.com/*", lambda route: route.abort())
|
||||||
|
|
||||||
|
print(" [Hook] ✓ Performance optimization applied")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def viewport_setup_hook(page, context, **kwargs):
|
||||||
|
"""
|
||||||
|
Viewport Hook: Set consistent viewport size for rendering
|
||||||
|
"""
|
||||||
|
print(" [Hook] 🖥️ Setting viewport to 1920x1080...")
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
print(" [Hook] ✓ Viewport configured")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def authentication_headers_hook(page, context, url, **kwargs):
|
||||||
|
"""
|
||||||
|
Headers Hook: Add custom authentication and tracking headers
|
||||||
|
"""
|
||||||
|
print(f" [Hook] 🔐 Adding custom headers for {url[:50]}...")
|
||||||
|
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'X-Crawl4AI': 'docker-hooks',
|
||||||
|
'X-Custom-Hook': 'function-based',
|
||||||
|
'Accept-Language': 'en-US,en;q=0.9',
|
||||||
|
})
|
||||||
|
|
||||||
|
print(" [Hook] ✓ Custom headers added")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def lazy_loading_handler_hook(page, context, **kwargs):
|
||||||
|
"""
|
||||||
|
Content Hook: Handle lazy-loaded content by scrolling
|
||||||
|
"""
|
||||||
|
print(" [Hook] 📜 Scrolling to load lazy content...")
|
||||||
|
|
||||||
|
# Scroll to bottom
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
|
||||||
|
# Scroll to middle
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight / 2)")
|
||||||
|
await page.wait_for_timeout(500)
|
||||||
|
|
||||||
|
# Scroll back to top
|
||||||
|
await page.evaluate("window.scrollTo(0, 0)")
|
||||||
|
await page.wait_for_timeout(500)
|
||||||
|
|
||||||
|
print(" [Hook] ✓ Lazy content loaded")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def page_analytics_hook(page, context, **kwargs):
|
||||||
|
"""
|
||||||
|
Analytics Hook: Log page metrics before extraction
|
||||||
|
"""
|
||||||
|
print(" [Hook] 📊 Collecting page analytics...")
|
||||||
|
|
||||||
|
metrics = await page.evaluate('''
|
||||||
|
() => ({
|
||||||
|
title: document.title,
|
||||||
|
images: document.images.length,
|
||||||
|
links: document.links.length,
|
||||||
|
scripts: document.scripts.length,
|
||||||
|
headings: document.querySelectorAll('h1, h2, h3').length,
|
||||||
|
paragraphs: document.querySelectorAll('p').length
|
||||||
|
})
|
||||||
|
''')
|
||||||
|
|
||||||
|
print(f" [Hook] 📈 Page: {metrics['title'][:50]}...")
|
||||||
|
print(f" Links: {metrics['links']}, Images: {metrics['images']}, "
|
||||||
|
f"Headings: {metrics['headings']}, Paragraphs: {metrics['paragraphs']}")
|
||||||
|
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# APPROACH 1: String-Based Hooks (REST API)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
def example_1_string_based_hooks():
|
||||||
|
"""
|
||||||
|
Demonstrate string-based hooks with REST API
|
||||||
|
Use this when working with REST API directly or non-Python clients
|
||||||
|
"""
|
||||||
|
print_section(
|
||||||
|
"APPROACH 1: String-Based Hooks (REST API)",
|
||||||
|
"Define hooks as strings for REST API requests"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define hooks as strings
|
||||||
|
hooks_config = {
|
||||||
|
"on_page_context_created": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print(" [String Hook] Setting up page context...")
|
||||||
|
# Block images for performance
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_goto": """
|
||||||
|
async def hook(page, context, url, **kwargs):
|
||||||
|
print(f" [String Hook] Navigating to {url[:50]}...")
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'X-Crawl4AI': 'string-based-hooks',
|
||||||
|
})
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_retrieve_html": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print(" [String Hook] Scrolling page...")
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Prepare request payload
|
||||||
|
payload = {
|
||||||
|
"urls": [TEST_URLS[2]], # httpbin.org
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_config,
|
||||||
|
"timeout": 30
|
||||||
|
},
|
||||||
|
"crawler_config": {
|
||||||
|
"cache_mode": "bypass"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"🎯 Target URL: {TEST_URLS[2]}")
|
||||||
|
print(f"🔧 Configured {len(hooks_config)} string-based hooks")
|
||||||
|
print(f"📡 Sending request to Docker API...\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
response = requests.post(f"{DOCKER_URL}/crawl", json=payload, timeout=60)
|
||||||
|
execution_time = time.time() - start_time
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
|
||||||
|
print(f"\n✅ Request successful! (took {execution_time:.2f}s)")
|
||||||
|
|
||||||
|
# Display results
|
||||||
|
if result.get('results') and result['results'][0].get('success'):
|
||||||
|
crawl_result = result['results'][0]
|
||||||
|
html_length = len(crawl_result.get('html', ''))
|
||||||
|
markdown_length = len(crawl_result.get('markdown', ''))
|
||||||
|
|
||||||
|
print(f"\n📊 Results:")
|
||||||
|
print(f" • HTML length: {html_length:,} characters")
|
||||||
|
print(f" • Markdown length: {markdown_length:,} characters")
|
||||||
|
print(f" • URL: {crawl_result.get('url')}")
|
||||||
|
|
||||||
|
# Check hooks execution
|
||||||
|
if 'hooks' in result:
|
||||||
|
hooks_info = result['hooks']
|
||||||
|
print(f"\n🎣 Hooks Execution:")
|
||||||
|
print(f" • Status: {hooks_info['status']['status']}")
|
||||||
|
print(f" • Attached hooks: {len(hooks_info['status']['attached_hooks'])}")
|
||||||
|
|
||||||
|
if 'summary' in hooks_info:
|
||||||
|
summary = hooks_info['summary']
|
||||||
|
print(f" • Total executions: {summary['total_executions']}")
|
||||||
|
print(f" • Successful: {summary['successful']}")
|
||||||
|
print(f" • Success rate: {summary['success_rate']:.1f}%")
|
||||||
|
else:
|
||||||
|
print(f"⚠️ Crawl completed but no results")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(f"❌ Request failed with status {response.status_code}")
|
||||||
|
print(f" Error: {response.text[:200]}")
|
||||||
|
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
print("⏰ Request timed out after 60 seconds")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {str(e)}")
|
||||||
|
|
||||||
|
print("\n" + "─" * 70)
|
||||||
|
print("✓ String-based hooks example complete\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# APPROACH 2: Function-Based Hooks with hooks_to_string() Utility
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
def example_2_hooks_to_string_utility():
|
||||||
|
"""
|
||||||
|
Demonstrate the hooks_to_string() utility for converting functions
|
||||||
|
Use this when you want to write hooks as functions but use REST API
|
||||||
|
"""
|
||||||
|
print_section(
|
||||||
|
"APPROACH 2: hooks_to_string() Utility",
|
||||||
|
"Convert Python functions to strings for REST API"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("📦 Creating hook functions...")
|
||||||
|
print(" • performance_optimization_hook")
|
||||||
|
print(" • authentication_headers_hook")
|
||||||
|
print(" • lazy_loading_handler_hook")
|
||||||
|
|
||||||
|
# Convert function objects to strings using the utility
|
||||||
|
print("\n🔄 Converting functions to strings with hooks_to_string()...")
|
||||||
|
|
||||||
|
hooks_dict = {
|
||||||
|
"on_page_context_created": performance_optimization_hook,
|
||||||
|
"before_goto": authentication_headers_hook,
|
||||||
|
"before_retrieve_html": lazy_loading_handler_hook,
|
||||||
|
}
|
||||||
|
|
||||||
|
hooks_as_strings = hooks_to_string(hooks_dict)
|
||||||
|
|
||||||
|
print(f"✅ Successfully converted {len(hooks_as_strings)} functions to strings")
|
||||||
|
|
||||||
|
# Show a preview
|
||||||
|
print("\n📝 Sample converted hook (first 200 characters):")
|
||||||
|
print("─" * 70)
|
||||||
|
sample_hook = list(hooks_as_strings.values())[0]
|
||||||
|
print(sample_hook[:200] + "...")
|
||||||
|
print("─" * 70)
|
||||||
|
|
||||||
|
# Use the converted hooks with REST API
|
||||||
|
print("\n📡 Using converted hooks with REST API...")
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"urls": [TEST_URLS[2]],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_as_strings,
|
||||||
|
"timeout": 30
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
response = requests.post(f"{DOCKER_URL}/crawl", json=payload, timeout=60)
|
||||||
|
execution_time = time.time() - start_time
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
print(f"\n✅ Request successful! (took {execution_time:.2f}s)")
|
||||||
|
|
||||||
|
if result.get('results') and result['results'][0].get('success'):
|
||||||
|
crawl_result = result['results'][0]
|
||||||
|
print(f" • HTML length: {len(crawl_result.get('html', '')):,} characters")
|
||||||
|
print(f" • Hooks executed successfully!")
|
||||||
|
else:
|
||||||
|
print(f"❌ Request failed: {response.status_code}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {str(e)}")
|
||||||
|
|
||||||
|
print("\n💡 Benefits of hooks_to_string():")
|
||||||
|
print(" ✓ Write hooks as regular Python functions")
|
||||||
|
print(" ✓ Full IDE support (autocomplete, syntax highlighting)")
|
||||||
|
print(" ✓ Type checking and linting")
|
||||||
|
print(" ✓ Easy to test and debug")
|
||||||
|
print(" ✓ Reusable across projects")
|
||||||
|
print(" ✓ Works with any REST API client")
|
||||||
|
|
||||||
|
print("\n" + "─" * 70)
|
||||||
|
print("✓ hooks_to_string() utility example complete\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# APPROACH 3: Docker Client with Automatic Conversion (RECOMMENDED)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
async def example_3_docker_client_auto_conversion():
|
||||||
|
"""
|
||||||
|
Demonstrate Docker Client with automatic hook conversion (RECOMMENDED)
|
||||||
|
Use this for the best developer experience with Python
|
||||||
|
"""
|
||||||
|
print_section(
|
||||||
|
"APPROACH 3: Docker Client with Auto-Conversion (RECOMMENDED)",
|
||||||
|
"Pass function objects directly - conversion happens automatically!"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("🐳 Initializing Crawl4AI Docker Client...")
|
||||||
|
client = Crawl4aiDockerClient(base_url=DOCKER_URL)
|
||||||
|
|
||||||
|
print("✅ Client ready!\n")
|
||||||
|
|
||||||
|
# Use our reusable hook library - just pass the function objects!
|
||||||
|
print("📚 Using reusable hook library:")
|
||||||
|
print(" • performance_optimization_hook")
|
||||||
|
print(" • authentication_headers_hook")
|
||||||
|
print(" • lazy_loading_handler_hook")
|
||||||
|
print(" • page_analytics_hook")
|
||||||
|
|
||||||
|
print("\n🎯 Target URL: " + TEST_URLS[0])
|
||||||
|
print("🚀 Starting crawl with automatic hook conversion...\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
# Pass function objects directly - NO manual conversion needed! ✨
|
||||||
|
results = await client.crawl(
|
||||||
|
urls=[TEST_URLS[0]],
|
||||||
|
hooks={
|
||||||
|
"on_page_context_created": performance_optimization_hook,
|
||||||
|
"before_goto": authentication_headers_hook,
|
||||||
|
"before_retrieve_html": lazy_loading_handler_hook,
|
||||||
|
"before_return_html": page_analytics_hook,
|
||||||
|
},
|
||||||
|
hooks_timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
execution_time = time.time() - start_time
|
||||||
|
|
||||||
|
print(f"\n✅ Crawl completed! (took {execution_time:.2f}s)\n")
|
||||||
|
|
||||||
|
# Display results
|
||||||
|
if results and results.success:
|
||||||
|
result = results
|
||||||
|
print(f"📊 Results:")
|
||||||
|
print(f" • URL: {result.url}")
|
||||||
|
print(f" • Success: {result.success}")
|
||||||
|
print(f" • HTML length: {len(result.html):,} characters")
|
||||||
|
print(f" • Markdown length: {len(result.markdown):,} characters")
|
||||||
|
|
||||||
|
# Show metadata
|
||||||
|
if result.metadata:
|
||||||
|
print(f"\n📋 Metadata:")
|
||||||
|
print(f" • Title: {result.metadata.get('title', 'N/A')[:50]}...")
|
||||||
|
|
||||||
|
# Show links
|
||||||
|
if result.links:
|
||||||
|
internal_count = len(result.links.get('internal', []))
|
||||||
|
external_count = len(result.links.get('external', []))
|
||||||
|
print(f"\n🔗 Links Found:")
|
||||||
|
print(f" • Internal: {internal_count}")
|
||||||
|
print(f" • External: {external_count}")
|
||||||
|
else:
|
||||||
|
print(f"⚠️ Crawl completed but no successful results")
|
||||||
|
if results:
|
||||||
|
print(f" Error: {results.error_message}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
print("\n🌟 Why Docker Client is RECOMMENDED:")
|
||||||
|
print(" ✓ Automatic function-to-string conversion")
|
||||||
|
print(" ✓ No manual hooks_to_string() calls needed")
|
||||||
|
print(" ✓ Cleaner, more Pythonic code")
|
||||||
|
print(" ✓ Full type hints and IDE support")
|
||||||
|
print(" ✓ Built-in error handling")
|
||||||
|
print(" ✓ Async/await support")
|
||||||
|
|
||||||
|
print("\n" + "─" * 70)
|
||||||
|
print("✓ Docker Client auto-conversion example complete\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# APPROACH 4: Authentication Example
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
def example_4_authentication_flow():
|
||||||
|
"""
|
||||||
|
Demonstrate authentication flow with multiple hooks
|
||||||
|
"""
|
||||||
|
print_section(
|
||||||
|
"EXAMPLE 4: Authentication Flow",
|
||||||
|
"Using hooks for authentication with cookies and headers"
|
||||||
|
)
|
||||||
|
|
||||||
|
hooks_code = {
|
||||||
|
"on_page_context_created": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print("[HOOK] Setting up authentication context")
|
||||||
|
|
||||||
|
# Add authentication cookies
|
||||||
|
await context.add_cookies([
|
||||||
|
{
|
||||||
|
"name": "auth_token",
|
||||||
|
"value": "fake_jwt_token_here",
|
||||||
|
"domain": ".httpbin.org",
|
||||||
|
"path": "/",
|
||||||
|
"httpOnly": True,
|
||||||
|
"secure": True
|
||||||
|
}
|
||||||
|
])
|
||||||
|
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_goto": """
|
||||||
|
async def hook(page, context, url, **kwargs):
|
||||||
|
print(f"[HOOK] Adding auth headers for {url}")
|
||||||
|
|
||||||
|
# Add Authorization header
|
||||||
|
import base64
|
||||||
|
credentials = base64.b64encode(b"user:passwd").decode('ascii')
|
||||||
|
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'Authorization': f'Basic {credentials}',
|
||||||
|
'X-API-Key': 'test-api-key-123'
|
||||||
|
})
|
||||||
|
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/basic-auth/user/passwd"],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_code,
|
||||||
|
"timeout": 15
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print("\nTesting authentication with httpbin endpoints...")
|
||||||
|
response = requests.post(f"{DOCKER_URL}/crawl", json=payload)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
print("✅ Authentication test completed")
|
||||||
|
|
||||||
|
if 'results' in data:
|
||||||
|
for i, result in enumerate(data['results']):
|
||||||
|
print(f"\n URL {i+1}: {result['url']}")
|
||||||
|
if result.get('success'):
|
||||||
|
# Check for authentication success indicators
|
||||||
|
html_content = result.get('html', '')
|
||||||
|
if '"authenticated"' in html_content and 'true' in html_content:
|
||||||
|
print(" ✅ Authentication successful! Basic auth worked.")
|
||||||
|
else:
|
||||||
|
print(" ⚠️ Page loaded but auth status unclear")
|
||||||
|
else:
|
||||||
|
print(f" ❌ Failed: {result.get('error_message', 'Unknown error')}")
|
||||||
|
else:
|
||||||
|
print(f"❌ Error: {response.status_code}")
|
||||||
|
|
||||||
|
print("\n" + "─" * 70)
|
||||||
|
print("✓ Authentication example complete\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# MAIN EXECUTION
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""
|
||||||
|
Run all example demonstrations
|
||||||
|
"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print(" 🚀 Crawl4AI - Docker Hooks System Examples")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# Check Docker service
|
||||||
|
print("\n🔍 Checking Docker service status...")
|
||||||
|
if not check_docker_service():
|
||||||
|
print("❌ Docker service is not running!")
|
||||||
|
print("\n📋 To start the Docker service:")
|
||||||
|
print(" docker run -p 11235:11235 unclecode/crawl4ai:latest")
|
||||||
|
print("\nPlease start the service and run this example again.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print("✅ Docker service is running!\n")
|
||||||
|
|
||||||
|
# Run all examples
|
||||||
|
examples = [
|
||||||
|
("String-Based Hooks (REST API)", example_1_string_based_hooks, False),
|
||||||
|
("hooks_to_string() Utility", example_2_hooks_to_string_utility, False),
|
||||||
|
("Docker Client Auto-Conversion (Recommended)", example_3_docker_client_auto_conversion, True),
|
||||||
|
("Authentication Flow", example_4_authentication_flow, False),
|
||||||
|
]
|
||||||
|
|
||||||
|
for i, (name, example_func, is_async) in enumerate(examples, 1):
|
||||||
|
print(f"\n{'🔷' * 35}")
|
||||||
|
print(f"Example {i}/{len(examples)}: {name}")
|
||||||
|
print(f"{'🔷' * 35}\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
if is_async:
|
||||||
|
await example_func()
|
||||||
|
else:
|
||||||
|
example_func()
|
||||||
|
|
||||||
|
print(f"✅ Example {i} completed successfully!")
|
||||||
|
|
||||||
|
# Pause between examples (except the last one)
|
||||||
|
if i < len(examples):
|
||||||
|
print("\n⏸️ Press Enter to continue to next example...")
|
||||||
|
input()
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print(f"\n⏹️ Examples interrupted by user")
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n❌ Example {i} failed: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
print("\nContinuing to next example...\n")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Final summary
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print(" 🎉 All Examples Complete!")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
print("\n📊 Summary - Three Approaches to Docker Hooks:")
|
||||||
|
|
||||||
|
print("\n✨ 1. String-Based Hooks:")
|
||||||
|
print(" • Write hooks as strings directly in JSON")
|
||||||
|
print(" • Best for: REST API, non-Python clients, simple use cases")
|
||||||
|
print(" • Cons: No IDE support, harder to debug")
|
||||||
|
|
||||||
|
print("\n✨ 2. hooks_to_string() Utility:")
|
||||||
|
print(" • Write hooks as Python functions, convert to strings")
|
||||||
|
print(" • Best for: Python with REST API, reusable hook libraries")
|
||||||
|
print(" • Pros: IDE support, type checking, easy debugging")
|
||||||
|
|
||||||
|
print("\n✨ 3. Docker Client (RECOMMENDED):")
|
||||||
|
print(" • Pass function objects directly, automatic conversion")
|
||||||
|
print(" • Best for: Python applications, best developer experience")
|
||||||
|
print(" • Pros: All benefits of #2 + cleaner code, no manual conversion")
|
||||||
|
|
||||||
|
print("\n💡 Recommendation:")
|
||||||
|
print(" Use Docker Client (#3) for Python applications")
|
||||||
|
print(" Use hooks_to_string() (#2) when you need REST API flexibility")
|
||||||
|
print(" Use string-based (#1) for non-Python clients or simple scripts")
|
||||||
|
|
||||||
|
print("\n🎯 8 Hook Points Available:")
|
||||||
|
print(" • on_browser_created, on_page_context_created")
|
||||||
|
print(" • on_user_agent_updated, before_goto, after_goto")
|
||||||
|
print(" • on_execution_started, before_retrieve_html, before_return_html")
|
||||||
|
|
||||||
|
print("\n📚 Resources:")
|
||||||
|
print(" • Docs: https://docs.crawl4ai.com/core/docker-deployment")
|
||||||
|
print(" • GitHub: https://github.com/unclecode/crawl4ai")
|
||||||
|
print(" • Discord: https://discord.gg/jP8KfhDhyN")
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print(" Happy Crawling! 🕷️")
|
||||||
|
print("=" * 70 + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("\n🎬 Starting Crawl4AI Docker Hooks Examples...")
|
||||||
|
print("Press Ctrl+C anytime to exit\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(main())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n\n👋 Examples stopped by user. Thanks for exploring Crawl4AI!")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n\n❌ Error: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
461
docs/examples/docker_webhook_example.py
Normal file
461
docs/examples/docker_webhook_example.py
Normal file
@@ -0,0 +1,461 @@
|
|||||||
|
"""
|
||||||
|
Docker Webhook Example for Crawl4AI
|
||||||
|
|
||||||
|
This example demonstrates how to use webhooks with the Crawl4AI job queue API.
|
||||||
|
Instead of polling for results, webhooks notify your application when jobs complete.
|
||||||
|
|
||||||
|
Supports both:
|
||||||
|
- /crawl/job - Raw crawling with markdown extraction
|
||||||
|
- /llm/job - LLM-powered content extraction
|
||||||
|
|
||||||
|
Prerequisites:
|
||||||
|
1. Crawl4AI Docker container running on localhost:11235
|
||||||
|
2. Flask installed: pip install flask requests
|
||||||
|
3. LLM API key configured in .llm.env (for LLM extraction examples)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
1. Run this script: python docker_webhook_example.py
|
||||||
|
2. The webhook server will start on http://localhost:8080
|
||||||
|
3. Jobs will be submitted and webhooks will be received automatically
|
||||||
|
"""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from flask import Flask, request, jsonify
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
CRAWL4AI_BASE_URL = "http://localhost:11235"
|
||||||
|
WEBHOOK_BASE_URL = "http://localhost:8080" # Your webhook receiver URL
|
||||||
|
|
||||||
|
# Initialize Flask app for webhook receiver
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
# Store received webhook data for demonstration
|
||||||
|
received_webhooks = []
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/webhooks/crawl-complete', methods=['POST'])
|
||||||
|
def handle_crawl_webhook():
|
||||||
|
"""
|
||||||
|
Webhook handler that receives notifications when crawl jobs complete.
|
||||||
|
|
||||||
|
Payload structure:
|
||||||
|
{
|
||||||
|
"task_id": "crawl_abc123",
|
||||||
|
"task_type": "crawl",
|
||||||
|
"status": "completed" or "failed",
|
||||||
|
"timestamp": "2025-10-21T10:30:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"error": "error message" (only if failed),
|
||||||
|
"data": {...} (only if webhook_data_in_payload=True)
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
payload = request.json
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"📬 Webhook received for task: {payload['task_id']}")
|
||||||
|
print(f" Status: {payload['status']}")
|
||||||
|
print(f" Timestamp: {payload['timestamp']}")
|
||||||
|
print(f" URLs: {payload['urls']}")
|
||||||
|
|
||||||
|
if payload['status'] == 'completed':
|
||||||
|
# If data is in payload, process it directly
|
||||||
|
if 'data' in payload:
|
||||||
|
print(f" ✅ Data included in webhook")
|
||||||
|
data = payload['data']
|
||||||
|
# Process the crawl results here
|
||||||
|
for result in data.get('results', []):
|
||||||
|
print(f" - Crawled: {result.get('url')}")
|
||||||
|
print(f" - Markdown length: {len(result.get('markdown', ''))}")
|
||||||
|
else:
|
||||||
|
# Fetch results from API if not included
|
||||||
|
print(f" 📥 Fetching results from API...")
|
||||||
|
task_id = payload['task_id']
|
||||||
|
result_response = requests.get(f"{CRAWL4AI_BASE_URL}/crawl/job/{task_id}")
|
||||||
|
if result_response.ok:
|
||||||
|
data = result_response.json()
|
||||||
|
print(f" ✅ Results fetched successfully")
|
||||||
|
# Process the crawl results here
|
||||||
|
for result in data['result'].get('results', []):
|
||||||
|
print(f" - Crawled: {result.get('url')}")
|
||||||
|
print(f" - Markdown length: {len(result.get('markdown', ''))}")
|
||||||
|
|
||||||
|
elif payload['status'] == 'failed':
|
||||||
|
print(f" ❌ Job failed: {payload.get('error', 'Unknown error')}")
|
||||||
|
|
||||||
|
print(f"{'='*60}\n")
|
||||||
|
|
||||||
|
# Store webhook for demonstration
|
||||||
|
received_webhooks.append(payload)
|
||||||
|
|
||||||
|
# Return 200 OK to acknowledge receipt
|
||||||
|
return jsonify({"status": "received"}), 200
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/webhooks/llm-complete', methods=['POST'])
|
||||||
|
def handle_llm_webhook():
|
||||||
|
"""
|
||||||
|
Webhook handler that receives notifications when LLM extraction jobs complete.
|
||||||
|
|
||||||
|
Payload structure:
|
||||||
|
{
|
||||||
|
"task_id": "llm_1698765432_12345",
|
||||||
|
"task_type": "llm_extraction",
|
||||||
|
"status": "completed" or "failed",
|
||||||
|
"timestamp": "2025-10-21T10:30:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com/article"],
|
||||||
|
"error": "error message" (only if failed),
|
||||||
|
"data": {"extracted_content": {...}} (only if webhook_data_in_payload=True)
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
payload = request.json
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"🤖 LLM Webhook received for task: {payload['task_id']}")
|
||||||
|
print(f" Task Type: {payload['task_type']}")
|
||||||
|
print(f" Status: {payload['status']}")
|
||||||
|
print(f" Timestamp: {payload['timestamp']}")
|
||||||
|
print(f" URL: {payload['urls'][0]}")
|
||||||
|
|
||||||
|
if payload['status'] == 'completed':
|
||||||
|
# If data is in payload, process it directly
|
||||||
|
if 'data' in payload:
|
||||||
|
print(f" ✅ Data included in webhook")
|
||||||
|
data = payload['data']
|
||||||
|
# Webhook wraps extracted content in 'extracted_content' field
|
||||||
|
extracted = data.get('extracted_content', {})
|
||||||
|
print(f" - Extracted content:")
|
||||||
|
print(f" {json.dumps(extracted, indent=8)}")
|
||||||
|
else:
|
||||||
|
# Fetch results from API if not included
|
||||||
|
print(f" 📥 Fetching results from API...")
|
||||||
|
task_id = payload['task_id']
|
||||||
|
result_response = requests.get(f"{CRAWL4AI_BASE_URL}/llm/job/{task_id}")
|
||||||
|
if result_response.ok:
|
||||||
|
data = result_response.json()
|
||||||
|
print(f" ✅ Results fetched successfully")
|
||||||
|
# API returns unwrapped content in 'result' field
|
||||||
|
extracted = data['result']
|
||||||
|
print(f" - Extracted content:")
|
||||||
|
print(f" {json.dumps(extracted, indent=8)}")
|
||||||
|
|
||||||
|
elif payload['status'] == 'failed':
|
||||||
|
print(f" ❌ Job failed: {payload.get('error', 'Unknown error')}")
|
||||||
|
|
||||||
|
print(f"{'='*60}\n")
|
||||||
|
|
||||||
|
# Store webhook for demonstration
|
||||||
|
received_webhooks.append(payload)
|
||||||
|
|
||||||
|
# Return 200 OK to acknowledge receipt
|
||||||
|
return jsonify({"status": "received"}), 200
|
||||||
|
|
||||||
|
|
||||||
|
def start_webhook_server():
|
||||||
|
"""Start the Flask webhook server in a separate thread"""
|
||||||
|
app.run(host='0.0.0.0', port=8080, debug=False, use_reloader=False)
|
||||||
|
|
||||||
|
|
||||||
|
def submit_crawl_job_with_webhook(urls, webhook_url, include_data=False):
|
||||||
|
"""
|
||||||
|
Submit a crawl job with webhook notification.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
urls: List of URLs to crawl
|
||||||
|
webhook_url: URL to receive webhook notifications
|
||||||
|
include_data: Whether to include full results in webhook payload
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
task_id: The job's task identifier
|
||||||
|
"""
|
||||||
|
payload = {
|
||||||
|
"urls": urls,
|
||||||
|
"browser_config": {"headless": True},
|
||||||
|
"crawler_config": {"cache_mode": "bypass"},
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": webhook_url,
|
||||||
|
"webhook_data_in_payload": include_data,
|
||||||
|
# Optional: Add custom headers for authentication
|
||||||
|
# "webhook_headers": {
|
||||||
|
# "X-Webhook-Secret": "your-secret-token"
|
||||||
|
# }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"\n🚀 Submitting crawl job...")
|
||||||
|
print(f" URLs: {urls}")
|
||||||
|
print(f" Webhook: {webhook_url}")
|
||||||
|
print(f" Include data: {include_data}")
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"{CRAWL4AI_BASE_URL}/crawl/job",
|
||||||
|
json=payload,
|
||||||
|
headers={"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.ok:
|
||||||
|
data = response.json()
|
||||||
|
task_id = data['task_id']
|
||||||
|
print(f" ✅ Job submitted successfully")
|
||||||
|
print(f" Task ID: {task_id}")
|
||||||
|
return task_id
|
||||||
|
else:
|
||||||
|
print(f" ❌ Failed to submit job: {response.text}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def submit_llm_job_with_webhook(url, query, webhook_url, include_data=False, schema=None, provider=None):
|
||||||
|
"""
|
||||||
|
Submit an LLM extraction job with webhook notification.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: URL to extract content from
|
||||||
|
query: Instruction for the LLM (e.g., "Extract article title and author")
|
||||||
|
webhook_url: URL to receive webhook notifications
|
||||||
|
include_data: Whether to include full results in webhook payload
|
||||||
|
schema: Optional JSON schema for structured extraction
|
||||||
|
provider: Optional LLM provider (e.g., "openai/gpt-4o-mini")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
task_id: The job's task identifier
|
||||||
|
"""
|
||||||
|
payload = {
|
||||||
|
"url": url,
|
||||||
|
"q": query,
|
||||||
|
"cache": False,
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": webhook_url,
|
||||||
|
"webhook_data_in_payload": include_data,
|
||||||
|
# Optional: Add custom headers for authentication
|
||||||
|
# "webhook_headers": {
|
||||||
|
# "X-Webhook-Secret": "your-secret-token"
|
||||||
|
# }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if schema:
|
||||||
|
payload["schema"] = schema
|
||||||
|
|
||||||
|
if provider:
|
||||||
|
payload["provider"] = provider
|
||||||
|
|
||||||
|
print(f"\n🤖 Submitting LLM extraction job...")
|
||||||
|
print(f" URL: {url}")
|
||||||
|
print(f" Query: {query}")
|
||||||
|
print(f" Webhook: {webhook_url}")
|
||||||
|
print(f" Include data: {include_data}")
|
||||||
|
if provider:
|
||||||
|
print(f" Provider: {provider}")
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"{CRAWL4AI_BASE_URL}/llm/job",
|
||||||
|
json=payload,
|
||||||
|
headers={"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.ok:
|
||||||
|
data = response.json()
|
||||||
|
task_id = data['task_id']
|
||||||
|
print(f" ✅ Job submitted successfully")
|
||||||
|
print(f" Task ID: {task_id}")
|
||||||
|
return task_id
|
||||||
|
else:
|
||||||
|
print(f" ❌ Failed to submit job: {response.text}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def submit_job_without_webhook(urls):
|
||||||
|
"""
|
||||||
|
Submit a job without webhook (traditional polling approach).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
urls: List of URLs to crawl
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
task_id: The job's task identifier
|
||||||
|
"""
|
||||||
|
payload = {
|
||||||
|
"urls": urls,
|
||||||
|
"browser_config": {"headless": True},
|
||||||
|
"crawler_config": {"cache_mode": "bypass"}
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"\n🚀 Submitting crawl job (without webhook)...")
|
||||||
|
print(f" URLs: {urls}")
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"{CRAWL4AI_BASE_URL}/crawl/job",
|
||||||
|
json=payload
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.ok:
|
||||||
|
data = response.json()
|
||||||
|
task_id = data['task_id']
|
||||||
|
print(f" ✅ Job submitted successfully")
|
||||||
|
print(f" Task ID: {task_id}")
|
||||||
|
return task_id
|
||||||
|
else:
|
||||||
|
print(f" ❌ Failed to submit job: {response.text}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def poll_job_status(task_id, timeout=60):
|
||||||
|
"""
|
||||||
|
Poll for job status (used when webhook is not configured).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
task_id: The job's task identifier
|
||||||
|
timeout: Maximum time to wait in seconds
|
||||||
|
"""
|
||||||
|
print(f"\n⏳ Polling for job status...")
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
while time.time() - start_time < timeout:
|
||||||
|
response = requests.get(f"{CRAWL4AI_BASE_URL}/crawl/job/{task_id}")
|
||||||
|
|
||||||
|
if response.ok:
|
||||||
|
data = response.json()
|
||||||
|
status = data.get('status', 'unknown')
|
||||||
|
|
||||||
|
if status == 'completed':
|
||||||
|
print(f" ✅ Job completed!")
|
||||||
|
return data
|
||||||
|
elif status == 'failed':
|
||||||
|
print(f" ❌ Job failed: {data.get('error', 'Unknown error')}")
|
||||||
|
return data
|
||||||
|
else:
|
||||||
|
print(f" ⏳ Status: {status}, waiting...")
|
||||||
|
time.sleep(2)
|
||||||
|
else:
|
||||||
|
print(f" ❌ Failed to get status: {response.text}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
print(f" ⏰ Timeout reached")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run the webhook demonstration"""
|
||||||
|
|
||||||
|
# Check if Crawl4AI is running
|
||||||
|
try:
|
||||||
|
health = requests.get(f"{CRAWL4AI_BASE_URL}/health", timeout=5)
|
||||||
|
print(f"✅ Crawl4AI is running: {health.json()}")
|
||||||
|
except:
|
||||||
|
print(f"❌ Cannot connect to Crawl4AI at {CRAWL4AI_BASE_URL}")
|
||||||
|
print(" Please make sure Docker container is running:")
|
||||||
|
print(" docker run -d -p 11235:11235 --name crawl4ai unclecode/crawl4ai:latest")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Start webhook server in background thread
|
||||||
|
print(f"\n🌐 Starting webhook server at {WEBHOOK_BASE_URL}...")
|
||||||
|
webhook_thread = Thread(target=start_webhook_server, daemon=True)
|
||||||
|
webhook_thread.start()
|
||||||
|
time.sleep(2) # Give server time to start
|
||||||
|
|
||||||
|
# Example 1: Job with webhook (notification only, fetch data separately)
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print("Example 1: Webhook Notification Only")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
task_id_1 = submit_crawl_job_with_webhook(
|
||||||
|
urls=["https://example.com"],
|
||||||
|
webhook_url=f"{WEBHOOK_BASE_URL}/webhooks/crawl-complete",
|
||||||
|
include_data=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Example 2: Job with webhook (data included in payload)
|
||||||
|
time.sleep(5) # Wait a bit between requests
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print("Example 2: Webhook with Full Data")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
task_id_2 = submit_crawl_job_with_webhook(
|
||||||
|
urls=["https://www.python.org"],
|
||||||
|
webhook_url=f"{WEBHOOK_BASE_URL}/webhooks/crawl-complete",
|
||||||
|
include_data=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Example 3: LLM extraction with webhook (notification only)
|
||||||
|
time.sleep(5) # Wait a bit between requests
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print("Example 3: LLM Extraction with Webhook (Notification Only)")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
task_id_3 = submit_llm_job_with_webhook(
|
||||||
|
url="https://www.example.com",
|
||||||
|
query="Extract the main heading and description from this page.",
|
||||||
|
webhook_url=f"{WEBHOOK_BASE_URL}/webhooks/llm-complete",
|
||||||
|
include_data=False,
|
||||||
|
provider="openai/gpt-4o-mini"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Example 4: LLM extraction with webhook (data included + schema)
|
||||||
|
time.sleep(5) # Wait a bit between requests
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print("Example 4: LLM Extraction with Schema and Full Data")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
|
||||||
|
# Define a schema for structured extraction
|
||||||
|
schema = json.dumps({
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"title": {"type": "string", "description": "Page title"},
|
||||||
|
"description": {"type": "string", "description": "Page description"}
|
||||||
|
},
|
||||||
|
"required": ["title"]
|
||||||
|
})
|
||||||
|
|
||||||
|
task_id_4 = submit_llm_job_with_webhook(
|
||||||
|
url="https://www.python.org",
|
||||||
|
query="Extract the title and description of this website",
|
||||||
|
webhook_url=f"{WEBHOOK_BASE_URL}/webhooks/llm-complete",
|
||||||
|
include_data=True,
|
||||||
|
schema=schema,
|
||||||
|
provider="openai/gpt-4o-mini"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Example 5: Traditional polling (no webhook)
|
||||||
|
time.sleep(5) # Wait a bit between requests
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print("Example 5: Traditional Polling (No Webhook)")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
task_id_5 = submit_job_without_webhook(
|
||||||
|
urls=["https://github.com"]
|
||||||
|
)
|
||||||
|
if task_id_5:
|
||||||
|
result = poll_job_status(task_id_5)
|
||||||
|
if result and result.get('status') == 'completed':
|
||||||
|
print(f" ✅ Results retrieved via polling")
|
||||||
|
|
||||||
|
# Wait for webhooks to arrive
|
||||||
|
print(f"\n⏳ Waiting for webhooks to be received...")
|
||||||
|
time.sleep(30) # Give jobs time to complete and webhooks to arrive (longer for LLM)
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print("Summary")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
print(f"Total webhooks received: {len(received_webhooks)}")
|
||||||
|
|
||||||
|
crawl_webhooks = [w for w in received_webhooks if w['task_type'] == 'crawl']
|
||||||
|
llm_webhooks = [w for w in received_webhooks if w['task_type'] == 'llm_extraction']
|
||||||
|
|
||||||
|
print(f"\n📊 Breakdown:")
|
||||||
|
print(f" - Crawl webhooks: {len(crawl_webhooks)}")
|
||||||
|
print(f" - LLM extraction webhooks: {len(llm_webhooks)}")
|
||||||
|
|
||||||
|
print(f"\n📋 Details:")
|
||||||
|
for i, webhook in enumerate(received_webhooks, 1):
|
||||||
|
task_type = webhook['task_type']
|
||||||
|
icon = "🕷️" if task_type == "crawl" else "🤖"
|
||||||
|
print(f"{i}. {icon} Task {webhook['task_id']}: {webhook['status']} ({task_type})")
|
||||||
|
|
||||||
|
print(f"\n✅ Demo completed!")
|
||||||
|
print(f"\n💡 Pro tips:")
|
||||||
|
print(f" - In production, your webhook URL should be publicly accessible")
|
||||||
|
print(f" (e.g., https://myapp.com/webhooks) or use ngrok for testing")
|
||||||
|
print(f" - Both /crawl/job and /llm/job support the same webhook configuration")
|
||||||
|
print(f" - Use webhook_data_in_payload=true to get results directly in the webhook")
|
||||||
|
print(f" - LLM jobs may take longer, adjust timeouts accordingly")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
BIN
docs/md_v2/assets/crawl4ai-skill.zip
Normal file
BIN
docs/md_v2/assets/crawl4ai-skill.zip
Normal file
Binary file not shown.
BIN
docs/md_v2/assets/images/logo.png
Normal file
BIN
docs/md_v2/assets/images/logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.6 KiB |
376
docs/md_v2/assets/page_actions.css
Normal file
376
docs/md_v2/assets/page_actions.css
Normal file
@@ -0,0 +1,376 @@
|
|||||||
|
/* ==== File: assets/page_actions.css ==== */
|
||||||
|
/* Page Actions Dropdown - Terminal Style */
|
||||||
|
|
||||||
|
/* Wrapper - positioned in content area */
|
||||||
|
.page-actions-wrapper {
|
||||||
|
position: absolute;
|
||||||
|
top: 1.3rem;
|
||||||
|
right: 1rem;
|
||||||
|
z-index: 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Floating Action Button */
|
||||||
|
.page-actions-button {
|
||||||
|
position: relative;
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
background: #3f3f44;
|
||||||
|
border: 1px solid #50ffff;
|
||||||
|
color: #e8e9ed;
|
||||||
|
padding: 0.75rem 1rem;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-family: 'Dank Mono', Monaco, monospace;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s ease;
|
||||||
|
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-button:hover {
|
||||||
|
background: #50ffff;
|
||||||
|
color: #070708;
|
||||||
|
transform: translateY(-2px);
|
||||||
|
box-shadow: 0 6px 16px rgba(80, 255, 255, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-button::before {
|
||||||
|
content: '▤';
|
||||||
|
font-size: 1.2rem;
|
||||||
|
line-height: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-button::after {
|
||||||
|
content: '▼';
|
||||||
|
font-size: 0.6rem;
|
||||||
|
transition: transform 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-button.active::after {
|
||||||
|
transform: rotate(180deg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Dropdown Menu */
|
||||||
|
.page-actions-dropdown {
|
||||||
|
position: absolute;
|
||||||
|
top: 3.5rem;
|
||||||
|
right: 0;
|
||||||
|
z-index: 1001;
|
||||||
|
background: #1a1a1a;
|
||||||
|
border: 1px solid #3f3f44;
|
||||||
|
border-radius: 8px;
|
||||||
|
min-width: 280px;
|
||||||
|
opacity: 0;
|
||||||
|
visibility: hidden;
|
||||||
|
transform: translateY(-10px);
|
||||||
|
transition: all 0.2s ease;
|
||||||
|
box-shadow: 0 8px 24px rgba(0, 0, 0, 0.5);
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-dropdown.active {
|
||||||
|
opacity: 1;
|
||||||
|
visibility: visible;
|
||||||
|
transform: translateY(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-dropdown::before {
|
||||||
|
content: '';
|
||||||
|
position: absolute;
|
||||||
|
top: -8px;
|
||||||
|
right: 1.5rem;
|
||||||
|
width: 0;
|
||||||
|
height: 0;
|
||||||
|
border-left: 8px solid transparent;
|
||||||
|
border-right: 8px solid transparent;
|
||||||
|
border-bottom: 8px solid #3f3f44;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Menu Header */
|
||||||
|
.page-actions-header {
|
||||||
|
background: #3f3f44;
|
||||||
|
padding: 0.5rem 0.75rem;
|
||||||
|
border-bottom: 1px solid #50ffff;
|
||||||
|
font-family: 'Dank Mono', Monaco, monospace;
|
||||||
|
font-size: 0.7rem;
|
||||||
|
color: #a3abba;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.05em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-header::before {
|
||||||
|
content: '┌─';
|
||||||
|
margin-right: 0.5rem;
|
||||||
|
color: #50ffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Menu Items */
|
||||||
|
.page-actions-menu {
|
||||||
|
list-style: none;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0.25rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-action-item {
|
||||||
|
display: block;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ul>li.page-action-item::after{
|
||||||
|
content: '';
|
||||||
|
}
|
||||||
|
.page-action-link {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
padding: 0.5rem 0.75rem;
|
||||||
|
color: #e8e9ed;
|
||||||
|
text-decoration: none !important;
|
||||||
|
font-family: 'Dank Mono', Monaco, monospace;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
transition: all 0.15s ease;
|
||||||
|
cursor: pointer;
|
||||||
|
border-left: 3px solid transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-action-link:hover:not(.disabled) {
|
||||||
|
background: #3f3f44;
|
||||||
|
border-left-color: #50ffff;
|
||||||
|
color: #50ffff;
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-action-link.disabled {
|
||||||
|
opacity: 0.5;
|
||||||
|
cursor: not-allowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-action-link.disabled:hover {
|
||||||
|
background: transparent;
|
||||||
|
color: #e8e9ed;
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Icons using ASCII/Terminal characters */
|
||||||
|
.page-action-icon {
|
||||||
|
font-size: 1rem;
|
||||||
|
width: 1.5rem;
|
||||||
|
text-align: center;
|
||||||
|
font-weight: bold;
|
||||||
|
color: #50ffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-action-link:hover:not(.disabled) .page-action-icon {
|
||||||
|
color: #50ffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-action-link.disabled .page-action-icon {
|
||||||
|
color: #666;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Specific icons */
|
||||||
|
.icon-copy::before {
|
||||||
|
content: '⎘'; /* Copy/duplicate symbol */
|
||||||
|
}
|
||||||
|
|
||||||
|
.icon-view::before {
|
||||||
|
content: '⎙'; /* Document symbol */
|
||||||
|
}
|
||||||
|
|
||||||
|
.icon-ai::before {
|
||||||
|
content: '⚡'; /* Lightning/AI symbol */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Action Text */
|
||||||
|
.page-action-text {
|
||||||
|
flex: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-action-label {
|
||||||
|
display: block;
|
||||||
|
font-weight: 600;
|
||||||
|
margin-bottom: 0.05rem;
|
||||||
|
line-height: 1.3;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-action-description {
|
||||||
|
display: block;
|
||||||
|
font-size: 0.7rem;
|
||||||
|
color: #a3abba;
|
||||||
|
line-height: 1.2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Badge */
|
||||||
|
/* External link indicator */
|
||||||
|
.page-action-external::after {
|
||||||
|
content: '→';
|
||||||
|
margin-left: 0.25rem;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Divider */
|
||||||
|
.page-actions-divider {
|
||||||
|
height: 1px;
|
||||||
|
background: #3f3f44;
|
||||||
|
margin: 0.25rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Success/Copy feedback */
|
||||||
|
.page-action-copied {
|
||||||
|
background: #50ff50 !important;
|
||||||
|
color: #070708 !important;
|
||||||
|
border-left-color: #50ff50 !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-action-copied .page-action-icon {
|
||||||
|
color: #070708 !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-action-copied .page-action-icon::before {
|
||||||
|
content: '✓';
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Mobile Responsive */
|
||||||
|
@media (max-width: 768px) {
|
||||||
|
.page-actions-wrapper {
|
||||||
|
top: 0.5rem;
|
||||||
|
right: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-button {
|
||||||
|
padding: 0.6rem 0.8rem;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-dropdown {
|
||||||
|
min-width: 260px;
|
||||||
|
max-width: calc(100vw - 2rem);
|
||||||
|
right: -0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-action-link {
|
||||||
|
padding: 0.6rem 0.8rem;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-action-description {
|
||||||
|
font-size: 0.7rem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Animation for tooltip/notification */
|
||||||
|
@keyframes slideInFromTop {
|
||||||
|
from {
|
||||||
|
transform: translateY(-20px);
|
||||||
|
opacity: 0;
|
||||||
|
}
|
||||||
|
to {
|
||||||
|
transform: translateY(0);
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-notification {
|
||||||
|
position: fixed;
|
||||||
|
top: calc(var(--header-height) + 0.5rem);
|
||||||
|
right: 50%;
|
||||||
|
transform: translateX(50%);
|
||||||
|
z-index: 1100;
|
||||||
|
background: #50ff50;
|
||||||
|
color: #070708;
|
||||||
|
padding: 0.75rem 1.5rem;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-family: 'Dank Mono', Monaco, monospace;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
font-weight: 600;
|
||||||
|
box-shadow: 0 4px 12px rgba(80, 255, 80, 0.4);
|
||||||
|
animation: slideInFromTop 0.3s ease;
|
||||||
|
pointer-events: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-notification::before {
|
||||||
|
content: '✓ ';
|
||||||
|
margin-right: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Hide on print */
|
||||||
|
@media print {
|
||||||
|
.page-actions-button,
|
||||||
|
.page-actions-dropdown {
|
||||||
|
display: none !important;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Overlay for mobile */
|
||||||
|
.page-actions-overlay {
|
||||||
|
display: none;
|
||||||
|
position: fixed;
|
||||||
|
top: 0;
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
bottom: 0;
|
||||||
|
background: rgba(0, 0, 0, 0.5);
|
||||||
|
z-index: 998;
|
||||||
|
opacity: 0;
|
||||||
|
transition: opacity 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-overlay.active {
|
||||||
|
display: block;
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: 768px) {
|
||||||
|
.page-actions-overlay {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Keyboard focus styles */
|
||||||
|
.page-action-link:focus {
|
||||||
|
outline: 2px solid #50ffff;
|
||||||
|
outline-offset: -2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-button:focus {
|
||||||
|
outline: 2px solid #50ffff;
|
||||||
|
outline-offset: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Loading state */
|
||||||
|
.page-action-link.loading {
|
||||||
|
pointer-events: none;
|
||||||
|
opacity: 0.7;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-action-link.loading .page-action-icon::before {
|
||||||
|
content: '⟳';
|
||||||
|
animation: spin 1s linear infinite;
|
||||||
|
}
|
||||||
|
|
||||||
|
@keyframes spin {
|
||||||
|
from { transform: rotate(0deg); }
|
||||||
|
to { transform: rotate(360deg); }
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Terminal-style border effect on hover */
|
||||||
|
.page-actions-dropdown:hover {
|
||||||
|
border-color: #50ffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Footer info */
|
||||||
|
.page-actions-footer {
|
||||||
|
background: #070708;
|
||||||
|
padding: 0.4rem 0.75rem;
|
||||||
|
border-top: 1px solid #3f3f44;
|
||||||
|
font-size: 0.65rem;
|
||||||
|
color: #666;
|
||||||
|
text-align: center;
|
||||||
|
font-family: 'Dank Mono', Monaco, monospace;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-actions-footer::before {
|
||||||
|
content: '└─';
|
||||||
|
margin-right: 0.5rem;
|
||||||
|
color: #3f3f44;
|
||||||
|
}
|
||||||
427
docs/md_v2/assets/page_actions.js
Normal file
427
docs/md_v2/assets/page_actions.js
Normal file
@@ -0,0 +1,427 @@
|
|||||||
|
// ==== File: assets/page_actions.js ====
|
||||||
|
// Page Actions - Copy/View Markdown functionality
|
||||||
|
|
||||||
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
|
// Configuration
|
||||||
|
const config = {
|
||||||
|
githubRepo: 'unclecode/crawl4ai',
|
||||||
|
githubBranch: 'main',
|
||||||
|
docsPath: 'docs/md_v2',
|
||||||
|
excludePaths: ['/apps/c4a-script/', '/apps/llmtxt/', '/apps/crawl4ai-assistant/', '/core/ask-ai/'], // Don't show on app pages
|
||||||
|
};
|
||||||
|
|
||||||
|
let cachedMarkdown = null;
|
||||||
|
let cachedMarkdownPath = null;
|
||||||
|
|
||||||
|
// Check if we should show the button on this page
|
||||||
|
function shouldShowButton() {
|
||||||
|
const currentPath = window.location.pathname;
|
||||||
|
|
||||||
|
// Don't show on homepage
|
||||||
|
if (currentPath === '/' || currentPath === '/index.html') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't show on 404 pages
|
||||||
|
if (document.title && document.title.toLowerCase().includes('404')) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Require mkdocs main content container
|
||||||
|
const mainContent = document.getElementById('terminal-mkdocs-main-content');
|
||||||
|
if (!mainContent) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't show on excluded paths (apps)
|
||||||
|
for (const excludePath of config.excludePaths) {
|
||||||
|
if (currentPath.includes(excludePath)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only show on documentation pages
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!shouldShowButton()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get current page markdown path
|
||||||
|
function getCurrentMarkdownPath() {
|
||||||
|
let path = window.location.pathname;
|
||||||
|
|
||||||
|
// Remove leading/trailing slashes
|
||||||
|
path = path.replace(/^\/|\/$/g, '');
|
||||||
|
|
||||||
|
// Remove .html extension if present
|
||||||
|
path = path.replace(/\.html$/, '');
|
||||||
|
|
||||||
|
// Handle root/index
|
||||||
|
if (!path || path === 'index') {
|
||||||
|
return 'index.md';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add .md extension
|
||||||
|
return `${path}.md`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadMarkdownContent() {
|
||||||
|
const mdPath = getCurrentMarkdownPath();
|
||||||
|
|
||||||
|
if (!mdPath) {
|
||||||
|
throw new Error('Invalid markdown path');
|
||||||
|
}
|
||||||
|
|
||||||
|
const rawUrl = getGithubRawUrl();
|
||||||
|
const response = await fetch(rawUrl);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Failed to fetch markdown: ${response.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const markdown = await response.text();
|
||||||
|
cachedMarkdown = markdown;
|
||||||
|
cachedMarkdownPath = mdPath;
|
||||||
|
return markdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function ensureMarkdownCached() {
|
||||||
|
const mdPath = getCurrentMarkdownPath();
|
||||||
|
|
||||||
|
if (!mdPath) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cachedMarkdown && cachedMarkdownPath === mdPath) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await loadMarkdownContent();
|
||||||
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Page Actions: Markdown not available for this page.', error);
|
||||||
|
cachedMarkdown = null;
|
||||||
|
cachedMarkdownPath = null;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getMarkdownContent() {
|
||||||
|
const available = await ensureMarkdownCached();
|
||||||
|
if (!available) {
|
||||||
|
throw new Error('Markdown not available for this page.');
|
||||||
|
}
|
||||||
|
return cachedMarkdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get GitHub raw URL for current page
|
||||||
|
function getGithubRawUrl() {
|
||||||
|
const mdPath = getCurrentMarkdownPath();
|
||||||
|
return `https://raw.githubusercontent.com/${config.githubRepo}/${config.githubBranch}/${config.docsPath}/${mdPath}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get GitHub file URL for current page (for viewing)
|
||||||
|
function getGithubFileUrl() {
|
||||||
|
const mdPath = getCurrentMarkdownPath();
|
||||||
|
return `https://github.com/${config.githubRepo}/blob/${config.githubBranch}/${config.docsPath}/${mdPath}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the UI
|
||||||
|
function createPageActionsUI() {
|
||||||
|
// Find the main content area
|
||||||
|
const mainContent = document.getElementById('terminal-mkdocs-main-content');
|
||||||
|
if (!mainContent) {
|
||||||
|
console.warn('Page Actions: Could not find #terminal-mkdocs-main-content');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create button
|
||||||
|
const button = document.createElement('button');
|
||||||
|
button.className = 'page-actions-button';
|
||||||
|
button.setAttribute('aria-label', 'Page copy');
|
||||||
|
button.setAttribute('aria-expanded', 'false');
|
||||||
|
button.innerHTML = '<span>Page Copy</span>';
|
||||||
|
|
||||||
|
// Create overlay for mobile
|
||||||
|
const overlay = document.createElement('div');
|
||||||
|
overlay.className = 'page-actions-overlay';
|
||||||
|
|
||||||
|
// Create dropdown
|
||||||
|
const dropdown = document.createElement('div');
|
||||||
|
dropdown.className = 'page-actions-dropdown';
|
||||||
|
dropdown.setAttribute('role', 'menu');
|
||||||
|
dropdown.innerHTML = `
|
||||||
|
<div class="page-actions-header">Page Copy</div>
|
||||||
|
<ul class="page-actions-menu">
|
||||||
|
<li class="page-action-item">
|
||||||
|
<a href="#" class="page-action-link" id="action-copy-markdown" role="menuitem">
|
||||||
|
<span class="page-action-icon icon-copy"></span>
|
||||||
|
<span class="page-action-text">
|
||||||
|
<span class="page-action-label">Copy as Markdown</span>
|
||||||
|
<span class="page-action-description">Copy page for LLMs</span>
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
<li class="page-action-item">
|
||||||
|
<a href="#" class="page-action-link page-action-external" id="action-view-markdown" target="_blank" role="menuitem">
|
||||||
|
<span class="page-action-icon icon-view"></span>
|
||||||
|
<span class="page-action-text">
|
||||||
|
<span class="page-action-label">View as Markdown</span>
|
||||||
|
<span class="page-action-description">Open raw source</span>
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
<div class="page-actions-divider"></div>
|
||||||
|
<li class="page-action-item">
|
||||||
|
<a href="#" class="page-action-link page-action-external" id="action-open-chatgpt" role="menuitem">
|
||||||
|
<span class="page-action-icon icon-ai"></span>
|
||||||
|
<span class="page-action-text">
|
||||||
|
<span class="page-action-label">Open in ChatGPT</span>
|
||||||
|
<span class="page-action-description">Ask questions about this page</span>
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
<div class="page-actions-footer">ESC to close</div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Create a wrapper for button and dropdown
|
||||||
|
const wrapper = document.createElement('div');
|
||||||
|
wrapper.className = 'page-actions-wrapper';
|
||||||
|
wrapper.appendChild(button);
|
||||||
|
wrapper.appendChild(dropdown);
|
||||||
|
|
||||||
|
// Inject into main content area
|
||||||
|
mainContent.appendChild(wrapper);
|
||||||
|
|
||||||
|
// Append overlay to body
|
||||||
|
document.body.appendChild(overlay);
|
||||||
|
|
||||||
|
return { button, dropdown, overlay, wrapper };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Toggle dropdown
|
||||||
|
function toggleDropdown(button, dropdown, overlay) {
|
||||||
|
const isActive = dropdown.classList.contains('active');
|
||||||
|
|
||||||
|
if (isActive) {
|
||||||
|
closeDropdown(button, dropdown, overlay);
|
||||||
|
} else {
|
||||||
|
openDropdown(button, dropdown, overlay);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function openDropdown(button, dropdown, overlay) {
|
||||||
|
dropdown.classList.add('active');
|
||||||
|
// Don't activate overlay - not needed
|
||||||
|
button.classList.add('active');
|
||||||
|
button.setAttribute('aria-expanded', 'true');
|
||||||
|
}
|
||||||
|
|
||||||
|
function closeDropdown(button, dropdown, overlay) {
|
||||||
|
dropdown.classList.remove('active');
|
||||||
|
// Don't deactivate overlay - not needed
|
||||||
|
button.classList.remove('active');
|
||||||
|
button.setAttribute('aria-expanded', 'false');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show notification
|
||||||
|
function showNotification(message, duration = 2000) {
|
||||||
|
const notification = document.createElement('div');
|
||||||
|
notification.className = 'page-actions-notification';
|
||||||
|
notification.textContent = message;
|
||||||
|
document.body.appendChild(notification);
|
||||||
|
|
||||||
|
setTimeout(() => {
|
||||||
|
notification.remove();
|
||||||
|
}, duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy markdown to clipboard
|
||||||
|
async function copyMarkdownToClipboard(link) {
|
||||||
|
// Add loading state
|
||||||
|
link.classList.add('loading');
|
||||||
|
|
||||||
|
try {
|
||||||
|
const markdown = await getMarkdownContent();
|
||||||
|
|
||||||
|
// Copy to clipboard
|
||||||
|
await navigator.clipboard.writeText(markdown);
|
||||||
|
|
||||||
|
// Visual feedback
|
||||||
|
link.classList.remove('loading');
|
||||||
|
link.classList.add('page-action-copied');
|
||||||
|
|
||||||
|
showNotification('Markdown copied to clipboard!');
|
||||||
|
|
||||||
|
// Reset after delay
|
||||||
|
setTimeout(() => {
|
||||||
|
link.classList.remove('page-action-copied');
|
||||||
|
}, 2000);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error copying markdown:', error);
|
||||||
|
link.classList.remove('loading');
|
||||||
|
showNotification('Error: Could not copy markdown');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// View markdown in new tab
|
||||||
|
function viewMarkdown() {
|
||||||
|
const githubUrl = getGithubFileUrl();
|
||||||
|
window.open(githubUrl, '_blank', 'noopener,noreferrer');
|
||||||
|
}
|
||||||
|
|
||||||
|
function getCurrentPageUrl() {
|
||||||
|
const { href } = window.location;
|
||||||
|
return href.split('#')[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
function openChatGPT() {
|
||||||
|
const pageUrl = getCurrentPageUrl();
|
||||||
|
const prompt = encodeURIComponent(`Read ${pageUrl} so I can ask questions about it.`);
|
||||||
|
const chatUrl = `https://chatgpt.com/?hint=search&prompt=${prompt}`;
|
||||||
|
window.open(chatUrl, '_blank', 'noopener,noreferrer');
|
||||||
|
}
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
if (!shouldShowButton()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const markdownAvailable = await ensureMarkdownCached();
|
||||||
|
if (!markdownAvailable) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const ui = createPageActionsUI();
|
||||||
|
if (!ui) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { button, dropdown, overlay } = ui;
|
||||||
|
|
||||||
|
// Event listeners
|
||||||
|
button.addEventListener('click', (e) => {
|
||||||
|
e.stopPropagation();
|
||||||
|
toggleDropdown(button, dropdown, overlay);
|
||||||
|
});
|
||||||
|
|
||||||
|
overlay.addEventListener('click', () => {
|
||||||
|
closeDropdown(button, dropdown, overlay);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Copy markdown action
|
||||||
|
document.getElementById('action-copy-markdown').addEventListener('click', async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
e.stopPropagation();
|
||||||
|
await copyMarkdownToClipboard(e.currentTarget);
|
||||||
|
});
|
||||||
|
|
||||||
|
// View markdown action
|
||||||
|
document.getElementById('action-view-markdown').addEventListener('click', (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
e.stopPropagation();
|
||||||
|
viewMarkdown();
|
||||||
|
closeDropdown(button, dropdown, overlay);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Open in ChatGPT action
|
||||||
|
document.getElementById('action-open-chatgpt').addEventListener('click', (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
e.stopPropagation();
|
||||||
|
openChatGPT();
|
||||||
|
closeDropdown(button, dropdown, overlay);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Close on ESC key
|
||||||
|
document.addEventListener('keydown', (e) => {
|
||||||
|
if (e.key === 'Escape' && dropdown.classList.contains('active')) {
|
||||||
|
closeDropdown(button, dropdown, overlay);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Close when clicking outside
|
||||||
|
document.addEventListener('click', (e) => {
|
||||||
|
if (!dropdown.contains(e.target) && !button.contains(e.target)) {
|
||||||
|
closeDropdown(button, dropdown, overlay);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Prevent dropdown from closing when clicking inside
|
||||||
|
dropdown.addEventListener('click', (e) => {
|
||||||
|
// Only stop propagation if not clicking on a link
|
||||||
|
if (!e.target.closest('.page-action-link')) {
|
||||||
|
e.stopPropagation();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Close dropdown on link click (except for copy which handles itself)
|
||||||
|
dropdown.querySelectorAll('.page-action-link:not(#action-copy-markdown)').forEach(link => {
|
||||||
|
link.addEventListener('click', () => {
|
||||||
|
if (!link.classList.contains('disabled')) {
|
||||||
|
setTimeout(() => {
|
||||||
|
closeDropdown(button, dropdown, overlay);
|
||||||
|
}, 100);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Handle window resize
|
||||||
|
let resizeTimer;
|
||||||
|
window.addEventListener('resize', () => {
|
||||||
|
clearTimeout(resizeTimer);
|
||||||
|
resizeTimer = setTimeout(() => {
|
||||||
|
// Close dropdown on resize to prevent positioning issues
|
||||||
|
if (dropdown.classList.contains('active')) {
|
||||||
|
closeDropdown(button, dropdown, overlay);
|
||||||
|
}
|
||||||
|
}, 250);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Accessibility: Focus management
|
||||||
|
button.addEventListener('keydown', (e) => {
|
||||||
|
if (e.key === 'Enter' || e.key === ' ') {
|
||||||
|
e.preventDefault();
|
||||||
|
toggleDropdown(button, dropdown, overlay);
|
||||||
|
|
||||||
|
// Focus first menu item when opening
|
||||||
|
if (dropdown.classList.contains('active')) {
|
||||||
|
const firstLink = dropdown.querySelector('.page-action-link:not(.disabled)');
|
||||||
|
if (firstLink) {
|
||||||
|
setTimeout(() => firstLink.focus(), 100);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Arrow key navigation within menu
|
||||||
|
dropdown.addEventListener('keydown', (e) => {
|
||||||
|
if (!dropdown.classList.contains('active')) return;
|
||||||
|
|
||||||
|
const links = Array.from(dropdown.querySelectorAll('.page-action-link:not(.disabled)'));
|
||||||
|
const currentIndex = links.indexOf(document.activeElement);
|
||||||
|
|
||||||
|
if (e.key === 'ArrowDown') {
|
||||||
|
e.preventDefault();
|
||||||
|
const nextIndex = (currentIndex + 1) % links.length;
|
||||||
|
links[nextIndex].focus();
|
||||||
|
} else if (e.key === 'ArrowUp') {
|
||||||
|
e.preventDefault();
|
||||||
|
const prevIndex = (currentIndex - 1 + links.length) % links.length;
|
||||||
|
links[prevIndex].focus();
|
||||||
|
} else if (e.key === 'Home') {
|
||||||
|
e.preventDefault();
|
||||||
|
links[0].focus();
|
||||||
|
} else if (e.key === 'End') {
|
||||||
|
e.preventDefault();
|
||||||
|
links[links.length - 1].focus();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Page Actions initialized for:', getCurrentMarkdownPath());
|
||||||
|
})();
|
||||||
|
});
|
||||||
@@ -20,17 +20,43 @@ Ever wondered why your AI coding assistant struggles with your library despite c
|
|||||||
|
|
||||||
## Latest Release
|
## Latest Release
|
||||||
|
|
||||||
|
### [Crawl4AI v0.7.6 – The Webhook Infrastructure Update](../blog/release-v0.7.6.md)
|
||||||
|
*October 22, 2025*
|
||||||
|
|
||||||
|
Crawl4AI v0.7.6 introduces comprehensive webhook support for the Docker job queue API, bringing real-time notifications to both crawling and LLM extraction workflows. No more polling!
|
||||||
|
|
||||||
|
Key highlights:
|
||||||
|
- **🪝 Complete Webhook Support**: Real-time notifications for both `/crawl/job` and `/llm/job` endpoints
|
||||||
|
- **🔄 Reliable Delivery**: Exponential backoff retry mechanism (5 attempts: 1s → 2s → 4s → 8s → 16s)
|
||||||
|
- **🔐 Custom Authentication**: Add custom headers for webhook authentication
|
||||||
|
- **📊 Flexible Delivery**: Choose notification-only or include full data in payload
|
||||||
|
- **⚙️ Global Configuration**: Set default webhook URL in config.yml for all jobs
|
||||||
|
- **🎯 Zero Breaking Changes**: Fully backward compatible, webhooks are opt-in
|
||||||
|
|
||||||
|
[Read full release notes →](../blog/release-v0.7.6.md)
|
||||||
|
|
||||||
|
## Recent Releases
|
||||||
|
|
||||||
|
### [Crawl4AI v0.7.5 – The Docker Hooks & Security Update](../blog/release-v0.7.5.md)
|
||||||
|
*September 29, 2025*
|
||||||
|
|
||||||
|
Crawl4AI v0.7.5 introduces the powerful Docker Hooks System for complete pipeline customization, enhanced LLM integration with custom providers, HTTPS preservation for modern web security, and resolves multiple community-reported issues.
|
||||||
|
|
||||||
|
Key highlights:
|
||||||
|
- **🔧 Docker Hooks System**: Custom Python functions at 8 key pipeline points for unprecedented customization
|
||||||
|
- **🤖 Enhanced LLM Integration**: Custom providers with temperature control and base_url configuration
|
||||||
|
- **🔒 HTTPS Preservation**: Secure internal link handling for modern web applications
|
||||||
|
- **🐍 Python 3.10+ Support**: Modern language features and enhanced performance
|
||||||
|
- **🛠️ Bug Fixes**: Resolved multiple community-reported issues including URL processing, JWT authentication, and proxy configuration
|
||||||
|
|
||||||
|
[Read full release notes →](../blog/release-v0.7.5.md)
|
||||||
|
|
||||||
|
## Recent Releases
|
||||||
|
|
||||||
### [Crawl4AI v0.7.4 – The Intelligent Table Extraction & Performance Update](../blog/release-v0.7.4.md)
|
### [Crawl4AI v0.7.4 – The Intelligent Table Extraction & Performance Update](../blog/release-v0.7.4.md)
|
||||||
*August 17, 2025*
|
*August 17, 2025*
|
||||||
|
|
||||||
Crawl4AI v0.7.4 introduces revolutionary LLM-powered table extraction with intelligent chunking, performance improvements for concurrent crawling, enhanced browser management, and critical stability fixes that make Crawl4AI more robust for production workloads.
|
Revolutionary LLM-powered table extraction with intelligent chunking, performance improvements for concurrent crawling, enhanced browser management, and critical stability fixes.
|
||||||
|
|
||||||
Key highlights:
|
|
||||||
- **🚀 LLMTableExtraction**: Revolutionary table extraction with intelligent chunking for massive tables
|
|
||||||
- **⚡ Dispatcher Bug Fix**: Fixed sequential processing issue in arun_many for fast-completing tasks
|
|
||||||
- **🧹 Memory Management Refactor**: Streamlined memory utilities and better resource management
|
|
||||||
- **🔧 Browser Manager Fixes**: Resolved race conditions in concurrent page creation
|
|
||||||
- **🔗 Advanced URL Processing**: Better handling of raw URLs and base tag link resolution
|
|
||||||
|
|
||||||
[Read full release notes →](../blog/release-v0.7.4.md)
|
[Read full release notes →](../blog/release-v0.7.4.md)
|
||||||
|
|
||||||
|
|||||||
314
docs/md_v2/blog/releases/0.7.6.md
Normal file
314
docs/md_v2/blog/releases/0.7.6.md
Normal file
@@ -0,0 +1,314 @@
|
|||||||
|
# Crawl4AI v0.7.6 Release Notes
|
||||||
|
|
||||||
|
*Release Date: October 22, 2025*
|
||||||
|
|
||||||
|
I'm excited to announce Crawl4AI v0.7.6, featuring a complete webhook infrastructure for the Docker job queue API! This release eliminates polling and brings real-time notifications to both crawling and LLM extraction workflows.
|
||||||
|
|
||||||
|
## 🎯 What's New
|
||||||
|
|
||||||
|
### Webhook Support for Docker Job Queue API
|
||||||
|
|
||||||
|
The headline feature of v0.7.6 is comprehensive webhook support for asynchronous job processing. No more constant polling to check if your jobs are done - get instant notifications when they complete!
|
||||||
|
|
||||||
|
**Key Capabilities:**
|
||||||
|
|
||||||
|
- ✅ **Universal Webhook Support**: Both `/crawl/job` and `/llm/job` endpoints now support webhooks
|
||||||
|
- ✅ **Flexible Delivery Modes**: Choose notification-only or include full data in the webhook payload
|
||||||
|
- ✅ **Reliable Delivery**: Exponential backoff retry mechanism (5 attempts: 1s → 2s → 4s → 8s → 16s)
|
||||||
|
- ✅ **Custom Authentication**: Add custom headers for webhook authentication
|
||||||
|
- ✅ **Global Configuration**: Set default webhook URL in `config.yml` for all jobs
|
||||||
|
- ✅ **Task Type Identification**: Distinguish between `crawl` and `llm_extraction` tasks
|
||||||
|
|
||||||
|
### How It Works
|
||||||
|
|
||||||
|
Instead of constantly checking job status:
|
||||||
|
|
||||||
|
**OLD WAY (Polling):**
|
||||||
|
```python
|
||||||
|
# Submit job
|
||||||
|
response = requests.post("http://localhost:11235/crawl/job", json=payload)
|
||||||
|
task_id = response.json()['task_id']
|
||||||
|
|
||||||
|
# Poll until complete
|
||||||
|
while True:
|
||||||
|
status = requests.get(f"http://localhost:11235/crawl/job/{task_id}")
|
||||||
|
if status.json()['status'] == 'completed':
|
||||||
|
break
|
||||||
|
time.sleep(5) # Wait and try again
|
||||||
|
```
|
||||||
|
|
||||||
|
**NEW WAY (Webhooks):**
|
||||||
|
```python
|
||||||
|
# Submit job with webhook
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhook",
|
||||||
|
"webhook_data_in_payload": True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
response = requests.post("http://localhost:11235/crawl/job", json=payload)
|
||||||
|
|
||||||
|
# Done! Webhook will notify you when complete
|
||||||
|
# Your webhook handler receives the results automatically
|
||||||
|
```
|
||||||
|
|
||||||
|
### Crawl Job Webhooks
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:11235/crawl/job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"browser_config": {"headless": true},
|
||||||
|
"crawler_config": {"cache_mode": "bypass"},
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhooks/crawl-complete",
|
||||||
|
"webhook_data_in_payload": false,
|
||||||
|
"webhook_headers": {
|
||||||
|
"X-Webhook-Secret": "your-secret-token"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### LLM Extraction Job Webhooks (NEW!)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:11235/llm/job \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"url": "https://example.com/article",
|
||||||
|
"q": "Extract the article title, author, and publication date",
|
||||||
|
"schema": "{\"type\":\"object\",\"properties\":{\"title\":{\"type\":\"string\"}}}",
|
||||||
|
"provider": "openai/gpt-4o-mini",
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhooks/llm-complete",
|
||||||
|
"webhook_data_in_payload": true
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Webhook Payload Structure
|
||||||
|
|
||||||
|
**Success (with data):**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"task_id": "llm_1698765432",
|
||||||
|
"task_type": "llm_extraction",
|
||||||
|
"status": "completed",
|
||||||
|
"timestamp": "2025-10-22T10:30:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com/article"],
|
||||||
|
"data": {
|
||||||
|
"extracted_content": {
|
||||||
|
"title": "Understanding Web Scraping",
|
||||||
|
"author": "John Doe",
|
||||||
|
"date": "2025-10-22"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Failure:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"task_id": "crawl_abc123",
|
||||||
|
"task_type": "crawl",
|
||||||
|
"status": "failed",
|
||||||
|
"timestamp": "2025-10-22T10:30:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"error": "Connection timeout after 30s"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Simple Webhook Handler Example
|
||||||
|
|
||||||
|
```python
|
||||||
|
from flask import Flask, request, jsonify
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@app.route('/webhook', methods=['POST'])
|
||||||
|
def handle_webhook():
|
||||||
|
payload = request.json
|
||||||
|
|
||||||
|
task_id = payload['task_id']
|
||||||
|
task_type = payload['task_type']
|
||||||
|
status = payload['status']
|
||||||
|
|
||||||
|
if status == 'completed':
|
||||||
|
if 'data' in payload:
|
||||||
|
# Process data directly
|
||||||
|
data = payload['data']
|
||||||
|
else:
|
||||||
|
# Fetch from API
|
||||||
|
endpoint = 'crawl' if task_type == 'crawl' else 'llm'
|
||||||
|
response = requests.get(f'http://localhost:11235/{endpoint}/job/{task_id}')
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# Your business logic here
|
||||||
|
print(f"Job {task_id} completed!")
|
||||||
|
|
||||||
|
elif status == 'failed':
|
||||||
|
error = payload.get('error', 'Unknown error')
|
||||||
|
print(f"Job {task_id} failed: {error}")
|
||||||
|
|
||||||
|
return jsonify({"status": "received"}), 200
|
||||||
|
|
||||||
|
app.run(port=8080)
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📊 Performance Improvements
|
||||||
|
|
||||||
|
- **Reduced Server Load**: Eliminates constant polling requests
|
||||||
|
- **Lower Latency**: Instant notification vs. polling interval delay
|
||||||
|
- **Better Resource Usage**: Frees up client connections while jobs run in background
|
||||||
|
- **Scalable Architecture**: Handles high-volume crawling workflows efficiently
|
||||||
|
|
||||||
|
## 🐛 Bug Fixes
|
||||||
|
|
||||||
|
- Fixed webhook configuration serialization for Pydantic HttpUrl fields
|
||||||
|
- Improved error handling in webhook delivery service
|
||||||
|
- Enhanced Redis task storage for webhook config persistence
|
||||||
|
|
||||||
|
## 🌍 Expected Real-World Impact
|
||||||
|
|
||||||
|
### For Web Scraping Workflows
|
||||||
|
- **Reduced Costs**: Less API calls = lower bandwidth and server costs
|
||||||
|
- **Better UX**: Instant notifications improve user experience
|
||||||
|
- **Scalability**: Handle 100s of concurrent jobs without polling overhead
|
||||||
|
|
||||||
|
### For LLM Extraction Pipelines
|
||||||
|
- **Async Processing**: Submit LLM extraction jobs and move on
|
||||||
|
- **Batch Processing**: Queue multiple extractions, get notified as they complete
|
||||||
|
- **Integration**: Easy integration with workflow automation tools (Zapier, n8n, etc.)
|
||||||
|
|
||||||
|
### For Microservices
|
||||||
|
- **Event-Driven**: Perfect for event-driven microservice architectures
|
||||||
|
- **Decoupling**: Decouple job submission from result processing
|
||||||
|
- **Reliability**: Automatic retries ensure webhooks are delivered
|
||||||
|
|
||||||
|
## 🔄 Breaking Changes
|
||||||
|
|
||||||
|
**None!** This release is fully backward compatible.
|
||||||
|
|
||||||
|
- Webhook configuration is optional
|
||||||
|
- Existing code continues to work without modification
|
||||||
|
- Polling is still supported for jobs without webhook config
|
||||||
|
|
||||||
|
## 📚 Documentation
|
||||||
|
|
||||||
|
### New Documentation
|
||||||
|
- **[WEBHOOK_EXAMPLES.md](../deploy/docker/WEBHOOK_EXAMPLES.md)** - Comprehensive webhook usage guide
|
||||||
|
- **[docker_webhook_example.py](../docs/examples/docker_webhook_example.py)** - Working code examples
|
||||||
|
|
||||||
|
### Updated Documentation
|
||||||
|
- **[Docker README](../deploy/docker/README.md)** - Added webhook sections
|
||||||
|
- API documentation with webhook examples
|
||||||
|
|
||||||
|
## 🛠️ Migration Guide
|
||||||
|
|
||||||
|
No migration needed! Webhooks are opt-in:
|
||||||
|
|
||||||
|
1. **To use webhooks**: Add `webhook_config` to your job payload
|
||||||
|
2. **To keep polling**: Continue using your existing code
|
||||||
|
|
||||||
|
### Quick Start
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Just add webhook_config to your existing payload
|
||||||
|
payload = {
|
||||||
|
# Your existing configuration
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"browser_config": {...},
|
||||||
|
"crawler_config": {...},
|
||||||
|
|
||||||
|
# NEW: Add webhook configuration
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhook",
|
||||||
|
"webhook_data_in_payload": True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🔧 Configuration
|
||||||
|
|
||||||
|
### Global Webhook Configuration (config.yml)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
webhooks:
|
||||||
|
enabled: true
|
||||||
|
default_url: "https://myapp.com/webhooks/default" # Optional
|
||||||
|
data_in_payload: false
|
||||||
|
retry:
|
||||||
|
max_attempts: 5
|
||||||
|
initial_delay_ms: 1000
|
||||||
|
max_delay_ms: 32000
|
||||||
|
timeout_ms: 30000
|
||||||
|
headers:
|
||||||
|
User-Agent: "Crawl4AI-Webhook/1.0"
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🚀 Upgrade Instructions
|
||||||
|
|
||||||
|
### Docker
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Pull the latest image
|
||||||
|
docker pull unclecode/crawl4ai:0.7.6
|
||||||
|
|
||||||
|
# Or use latest tag
|
||||||
|
docker pull unclecode/crawl4ai:latest
|
||||||
|
|
||||||
|
# Run with webhook support
|
||||||
|
docker run -d \
|
||||||
|
-p 11235:11235 \
|
||||||
|
--env-file .llm.env \
|
||||||
|
--name crawl4ai \
|
||||||
|
unclecode/crawl4ai:0.7.6
|
||||||
|
```
|
||||||
|
|
||||||
|
### Python Package
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install --upgrade crawl4ai
|
||||||
|
```
|
||||||
|
|
||||||
|
## 💡 Pro Tips
|
||||||
|
|
||||||
|
1. **Use notification-only mode** for large results - fetch data separately to avoid large webhook payloads
|
||||||
|
2. **Set custom headers** for webhook authentication and request tracking
|
||||||
|
3. **Configure global default webhook** for consistent handling across all jobs
|
||||||
|
4. **Implement idempotent webhook handlers** - same webhook may be delivered multiple times on retry
|
||||||
|
5. **Use structured schemas** with LLM extraction for predictable webhook data
|
||||||
|
|
||||||
|
## 🎬 Demo
|
||||||
|
|
||||||
|
Try the release demo:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python docs/releases_review/demo_v0.7.6.py
|
||||||
|
```
|
||||||
|
|
||||||
|
This comprehensive demo showcases:
|
||||||
|
- Crawl job webhooks (notification-only and with data)
|
||||||
|
- LLM extraction webhooks (with JSON schema support)
|
||||||
|
- Custom headers for authentication
|
||||||
|
- Webhook retry mechanism
|
||||||
|
- Real-time webhook receiver
|
||||||
|
|
||||||
|
## 🙏 Acknowledgments
|
||||||
|
|
||||||
|
Thank you to the community for the feedback that shaped this feature! Special thanks to everyone who requested webhook support for asynchronous job processing.
|
||||||
|
|
||||||
|
## 📞 Support
|
||||||
|
|
||||||
|
- **Documentation**: https://docs.crawl4ai.com
|
||||||
|
- **GitHub Issues**: https://github.com/unclecode/crawl4ai/issues
|
||||||
|
- **Discord**: https://discord.gg/crawl4ai
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Happy crawling with webhooks!** 🕷️🪝
|
||||||
|
|
||||||
|
*- unclecode*
|
||||||
318
docs/md_v2/blog/releases/v0.7.5.md
Normal file
318
docs/md_v2/blog/releases/v0.7.5.md
Normal file
@@ -0,0 +1,318 @@
|
|||||||
|
# 🚀 Crawl4AI v0.7.5: The Docker Hooks & Security Update
|
||||||
|
|
||||||
|
*September 29, 2025 • 8 min read*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Today I'm releasing Crawl4AI v0.7.5—focused on extensibility and security. This update introduces the Docker Hooks System for pipeline customization, enhanced LLM integration, and important security improvements.
|
||||||
|
|
||||||
|
## 🎯 What's New at a Glance
|
||||||
|
|
||||||
|
- **Docker Hooks System**: Custom Python functions at key pipeline points with function-based API
|
||||||
|
- **Function-Based Hooks**: New `hooks_to_string()` utility with Docker client auto-conversion
|
||||||
|
- **Enhanced LLM Integration**: Custom providers with temperature control
|
||||||
|
- **HTTPS Preservation**: Secure internal link handling
|
||||||
|
- **Bug Fixes**: Resolved multiple community-reported issues
|
||||||
|
- **Improved Docker Error Handling**: Better debugging and reliability
|
||||||
|
|
||||||
|
## 🔧 Docker Hooks System: Pipeline Customization
|
||||||
|
|
||||||
|
Every scraping project needs custom logic—authentication, performance optimization, content processing. Traditional solutions require forking or complex workarounds. Docker Hooks let you inject custom Python functions at 8 key points in the crawling pipeline.
|
||||||
|
|
||||||
|
### Real Example: Authentication & Performance
|
||||||
|
|
||||||
|
```python
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# Real working hooks for httpbin.org
|
||||||
|
hooks_config = {
|
||||||
|
"on_page_context_created": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print("Hook: Setting up page context")
|
||||||
|
# Block images to speed up crawling
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
|
||||||
|
print("Hook: Images blocked")
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_retrieve_html": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print("Hook: Before retrieving HTML")
|
||||||
|
# Scroll to bottom to load lazy content
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
print("Hook: Scrolled to bottom")
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_goto": """
|
||||||
|
async def hook(page, context, url, **kwargs):
|
||||||
|
print(f"Hook: About to navigate to {url}")
|
||||||
|
# Add custom headers
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'X-Test-Header': 'crawl4ai-hooks-test'
|
||||||
|
})
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Test with Docker API
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/html"],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_config,
|
||||||
|
"timeout": 30
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post("http://localhost:11235/crawl", json=payload)
|
||||||
|
result = response.json()
|
||||||
|
|
||||||
|
if result.get('success'):
|
||||||
|
print("✅ Hooks executed successfully!")
|
||||||
|
print(f"Content length: {len(result.get('markdown', ''))} characters")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Available Hook Points:**
|
||||||
|
- `on_browser_created`: Browser setup
|
||||||
|
- `on_page_context_created`: Page context configuration
|
||||||
|
- `before_goto`: Pre-navigation setup
|
||||||
|
- `after_goto`: Post-navigation processing
|
||||||
|
- `on_user_agent_updated`: User agent changes
|
||||||
|
- `on_execution_started`: Crawl initialization
|
||||||
|
- `before_retrieve_html`: Pre-extraction processing
|
||||||
|
- `before_return_html`: Final HTML processing
|
||||||
|
|
||||||
|
### Function-Based Hooks API
|
||||||
|
|
||||||
|
Writing hooks as strings works, but lacks IDE support and type checking. v0.7.5 introduces a function-based approach with automatic conversion!
|
||||||
|
|
||||||
|
**Option 1: Using the `hooks_to_string()` Utility**
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crawl4ai import hooks_to_string
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# Define hooks as regular Python functions (with full IDE support!)
|
||||||
|
async def on_page_context_created(page, context, **kwargs):
|
||||||
|
"""Block images to speed up crawling"""
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def before_goto(page, context, url, **kwargs):
|
||||||
|
"""Add custom headers"""
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'X-Crawl4AI': 'v0.7.5',
|
||||||
|
'X-Custom-Header': 'my-value'
|
||||||
|
})
|
||||||
|
return page
|
||||||
|
|
||||||
|
# Convert functions to strings
|
||||||
|
hooks_code = hooks_to_string({
|
||||||
|
"on_page_context_created": on_page_context_created,
|
||||||
|
"before_goto": before_goto
|
||||||
|
})
|
||||||
|
|
||||||
|
# Use with REST API
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/html"],
|
||||||
|
"hooks": {"code": hooks_code, "timeout": 30}
|
||||||
|
}
|
||||||
|
response = requests.post("http://localhost:11235/crawl", json=payload)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Option 2: Docker Client with Automatic Conversion (Recommended!)**
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crawl4ai.docker_client import Crawl4aiDockerClient
|
||||||
|
|
||||||
|
# Define hooks as functions (same as above)
|
||||||
|
async def on_page_context_created(page, context, **kwargs):
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def before_retrieve_html(page, context, **kwargs):
|
||||||
|
# Scroll to load lazy content
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
return page
|
||||||
|
|
||||||
|
# Use Docker client - conversion happens automatically!
|
||||||
|
client = Crawl4aiDockerClient(base_url="http://localhost:11235")
|
||||||
|
|
||||||
|
results = await client.crawl(
|
||||||
|
urls=["https://httpbin.org/html"],
|
||||||
|
hooks={
|
||||||
|
"on_page_context_created": on_page_context_created,
|
||||||
|
"before_retrieve_html": before_retrieve_html
|
||||||
|
},
|
||||||
|
hooks_timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if results and results.success:
|
||||||
|
print(f"✅ Hooks executed! HTML length: {len(results.html)}")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits of Function-Based Hooks:**
|
||||||
|
- ✅ Full IDE support (autocomplete, syntax highlighting)
|
||||||
|
- ✅ Type checking and linting
|
||||||
|
- ✅ Easier to test and debug
|
||||||
|
- ✅ Reusable across projects
|
||||||
|
- ✅ Automatic conversion in Docker client
|
||||||
|
- ✅ No breaking changes - string hooks still work!
|
||||||
|
|
||||||
|
## 🤖 Enhanced LLM Integration
|
||||||
|
|
||||||
|
Enhanced LLM integration with custom providers, temperature control, and base URL configuration.
|
||||||
|
|
||||||
|
### Multi-Provider Support
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
|
||||||
|
from crawl4ai.extraction_strategy import LLMExtractionStrategy
|
||||||
|
|
||||||
|
# Test with different providers
|
||||||
|
async def test_llm_providers():
|
||||||
|
# OpenAI with custom temperature
|
||||||
|
openai_strategy = LLMExtractionStrategy(
|
||||||
|
provider="gemini/gemini-2.5-flash-lite",
|
||||||
|
api_token="your-api-token",
|
||||||
|
temperature=0.7, # New in v0.7.5
|
||||||
|
instruction="Summarize this page in one sentence"
|
||||||
|
)
|
||||||
|
|
||||||
|
async with AsyncWebCrawler() as crawler:
|
||||||
|
result = await crawler.arun(
|
||||||
|
"https://example.com",
|
||||||
|
config=CrawlerRunConfig(extraction_strategy=openai_strategy)
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.success:
|
||||||
|
print("✅ LLM extraction completed")
|
||||||
|
print(result.extracted_content)
|
||||||
|
|
||||||
|
# Docker API with enhanced LLM config
|
||||||
|
llm_payload = {
|
||||||
|
"url": "https://example.com",
|
||||||
|
"f": "llm",
|
||||||
|
"q": "Summarize this page in one sentence.",
|
||||||
|
"provider": "gemini/gemini-2.5-flash-lite",
|
||||||
|
"temperature": 0.7
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post("http://localhost:11235/md", json=llm_payload)
|
||||||
|
```
|
||||||
|
|
||||||
|
**New Features:**
|
||||||
|
- Custom `temperature` parameter for creativity control
|
||||||
|
- `base_url` for custom API endpoints
|
||||||
|
- Multi-provider environment variable support
|
||||||
|
- Docker API integration
|
||||||
|
|
||||||
|
## 🔒 HTTPS Preservation
|
||||||
|
|
||||||
|
**The Problem:** Modern web apps require HTTPS everywhere. When crawlers downgrade internal links from HTTPS to HTTP, authentication breaks and security warnings appear.
|
||||||
|
|
||||||
|
**Solution:** HTTPS preservation maintains secure protocols throughout crawling.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, FilterChain, URLPatternFilter, BFSDeepCrawlStrategy
|
||||||
|
|
||||||
|
async def test_https_preservation():
|
||||||
|
# Enable HTTPS preservation
|
||||||
|
url_filter = URLPatternFilter(
|
||||||
|
patterns=["^(https:\/\/)?quotes\.toscrape\.com(\/.*)?$"]
|
||||||
|
)
|
||||||
|
|
||||||
|
config = CrawlerRunConfig(
|
||||||
|
exclude_external_links=True,
|
||||||
|
preserve_https_for_internal_links=True, # New in v0.7.5
|
||||||
|
deep_crawl_strategy=BFSDeepCrawlStrategy(
|
||||||
|
max_depth=2,
|
||||||
|
max_pages=5,
|
||||||
|
filter_chain=FilterChain([url_filter])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
async with AsyncWebCrawler() as crawler:
|
||||||
|
async for result in await crawler.arun(
|
||||||
|
url="https://quotes.toscrape.com",
|
||||||
|
config=config
|
||||||
|
):
|
||||||
|
# All internal links maintain HTTPS
|
||||||
|
internal_links = [link['href'] for link in result.links['internal']]
|
||||||
|
https_links = [link for link in internal_links if link.startswith('https://')]
|
||||||
|
|
||||||
|
print(f"HTTPS links preserved: {len(https_links)}/{len(internal_links)}")
|
||||||
|
for link in https_links[:3]:
|
||||||
|
print(f" → {link}")
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🛠️ Bug Fixes and Improvements
|
||||||
|
|
||||||
|
### Major Fixes
|
||||||
|
- **URL Processing**: Fixed '+' sign preservation in query parameters (#1332)
|
||||||
|
- **Proxy Configuration**: Enhanced proxy string parsing (old `proxy` parameter deprecated)
|
||||||
|
- **Docker Error Handling**: Comprehensive error messages with status codes
|
||||||
|
- **Memory Management**: Fixed leaks in long-running sessions
|
||||||
|
- **JWT Authentication**: Fixed Docker JWT validation issues (#1442)
|
||||||
|
- **Playwright Stealth**: Fixed stealth features for Playwright integration (#1481)
|
||||||
|
- **API Configuration**: Fixed config handling to prevent overriding user-provided settings (#1505)
|
||||||
|
- **Docker Filter Serialization**: Resolved JSON encoding errors in deep crawl strategy (#1419)
|
||||||
|
- **LLM Provider Support**: Fixed custom LLM provider integration for adaptive crawler (#1291)
|
||||||
|
- **Performance Issues**: Resolved backoff strategy failures and timeout handling (#989)
|
||||||
|
|
||||||
|
### Community-Reported Issues Fixed
|
||||||
|
This release addresses multiple issues reported by the community through GitHub issues and Discord discussions:
|
||||||
|
- Fixed browser configuration reference errors
|
||||||
|
- Resolved dependency conflicts with cssselect
|
||||||
|
- Improved error messaging for failed authentications
|
||||||
|
- Enhanced compatibility with various proxy configurations
|
||||||
|
- Fixed edge cases in URL normalization
|
||||||
|
|
||||||
|
### Configuration Updates
|
||||||
|
```python
|
||||||
|
# Old proxy config (deprecated)
|
||||||
|
# browser_config = BrowserConfig(proxy="http://proxy:8080")
|
||||||
|
|
||||||
|
# New enhanced proxy config
|
||||||
|
browser_config = BrowserConfig(
|
||||||
|
proxy_config={
|
||||||
|
"server": "http://proxy:8080",
|
||||||
|
"username": "optional-user",
|
||||||
|
"password": "optional-pass"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🔄 Breaking Changes
|
||||||
|
|
||||||
|
1. **Python 3.10+ Required**: Upgrade from Python 3.9
|
||||||
|
2. **Proxy Parameter Deprecated**: Use new `proxy_config` structure
|
||||||
|
3. **New Dependency**: Added `cssselect` for better CSS handling
|
||||||
|
|
||||||
|
## 🚀 Get Started
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install latest version
|
||||||
|
pip install crawl4ai==0.7.5
|
||||||
|
|
||||||
|
# Docker deployment
|
||||||
|
docker pull unclecode/crawl4ai:latest
|
||||||
|
docker run -p 11235:11235 unclecode/crawl4ai:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
**Try the Demo:**
|
||||||
|
```bash
|
||||||
|
# Run working examples
|
||||||
|
python docs/releases_review/demo_v0.7.5.py
|
||||||
|
```
|
||||||
|
|
||||||
|
**Resources:**
|
||||||
|
- 📖 Documentation: [docs.crawl4ai.com](https://docs.crawl4ai.com)
|
||||||
|
- 🐙 GitHub: [github.com/unclecode/crawl4ai](https://github.com/unclecode/crawl4ai)
|
||||||
|
- 💬 Discord: [discord.gg/crawl4ai](https://discord.gg/jP8KfhDhyN)
|
||||||
|
- 🐦 Twitter: [@unclecode](https://x.com/unclecode)
|
||||||
|
|
||||||
|
Happy crawling! 🕷️
|
||||||
1371
docs/md_v2/branding/index.md
Normal file
1371
docs/md_v2/branding/index.md
Normal file
File diff suppressed because it is too large
Load Diff
5196
docs/md_v2/complete-sdk-reference.md
Normal file
5196
docs/md_v2/complete-sdk-reference.md
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -59,6 +59,27 @@ Crawl4AI is the #1 trending GitHub repository, actively maintained by a vibrant
|
|||||||
|
|
||||||
> **Note**: If you're looking for the old documentation, you can access it [here](https://old.docs.crawl4ai.com).
|
> **Note**: If you're looking for the old documentation, you can access it [here](https://old.docs.crawl4ai.com).
|
||||||
|
|
||||||
|
## 🆕 AI Assistant Skill Now Available!
|
||||||
|
|
||||||
|
<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 10px; margin: 20px 0; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
|
||||||
|
<h3 style="color: white; margin: 0 0 10px 0;">🤖 Crawl4AI Skill for Claude & AI Assistants</h3>
|
||||||
|
<p style="color: white; margin: 10px 0;">Supercharge your AI coding assistant with complete Crawl4AI knowledge! Download our comprehensive skill package that includes:</p>
|
||||||
|
<ul style="color: white; margin: 10px 0;">
|
||||||
|
<li>📚 Complete SDK reference (23K+ words)</li>
|
||||||
|
<li>🚀 Ready-to-use extraction scripts</li>
|
||||||
|
<li>⚡ Schema generation for efficient scraping</li>
|
||||||
|
<li>🔧 Version 0.7.4 compatible</li>
|
||||||
|
</ul>
|
||||||
|
<div style="text-align: center; margin-top: 15px;">
|
||||||
|
<a href="assets/crawl4ai-skill.zip" download style="background: white; color: #667eea; padding: 12px 30px; border-radius: 5px; text-decoration: none; font-weight: bold; display: inline-block; transition: transform 0.2s;">
|
||||||
|
📦 Download Skill Package
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
<p style="color: white; margin: 15px 0 0 0; font-size: 0.9em; text-align: center;">
|
||||||
|
Works with Claude, Cursor, Windsurf, and other AI coding assistants. Import the .zip file into your AI assistant's skill/knowledge system.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
## 🎯 New: Adaptive Web Crawling
|
## 🎯 New: Adaptive Web Crawling
|
||||||
|
|
||||||
Crawl4AI now features intelligent adaptive crawling that knows when to stop! Using advanced information foraging algorithms, it determines when sufficient information has been gathered to answer your query.
|
Crawl4AI now features intelligent adaptive crawling that knows when to stop! Using advanced information foraging algorithms, it determines when sufficient information has been gathered to answer your query.
|
||||||
|
|||||||
66
docs/md_v2/marketplace/README.md
Normal file
66
docs/md_v2/marketplace/README.md
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
# Crawl4AI Marketplace
|
||||||
|
|
||||||
|
A terminal-themed marketplace for tools, integrations, and resources related to Crawl4AI.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
### Backend
|
||||||
|
|
||||||
|
1. Install dependencies:
|
||||||
|
```bash
|
||||||
|
cd backend
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Generate dummy data:
|
||||||
|
```bash
|
||||||
|
python dummy_data.py
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Run the server:
|
||||||
|
```bash
|
||||||
|
python server.py
|
||||||
|
```
|
||||||
|
|
||||||
|
The API will be available at http://localhost:8100
|
||||||
|
|
||||||
|
### Frontend
|
||||||
|
|
||||||
|
1. Open `frontend/index.html` in your browser
|
||||||
|
2. Or serve via MkDocs as part of the documentation site
|
||||||
|
|
||||||
|
## Database Schema
|
||||||
|
|
||||||
|
The marketplace uses SQLite with automatic migration from `schema.yaml`. Tables include:
|
||||||
|
- **apps**: Tools and integrations
|
||||||
|
- **articles**: Reviews, tutorials, and news
|
||||||
|
- **categories**: App categories
|
||||||
|
- **sponsors**: Sponsored content
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
- `GET /api/apps` - List apps with filters
|
||||||
|
- `GET /api/articles` - List articles
|
||||||
|
- `GET /api/categories` - Get all categories
|
||||||
|
- `GET /api/sponsors` - Get active sponsors
|
||||||
|
- `GET /api/search?q=query` - Search across content
|
||||||
|
- `GET /api/stats` - Marketplace statistics
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- **Smart caching**: LocalStorage with TTL (1 hour)
|
||||||
|
- **Terminal theme**: Consistent with Crawl4AI branding
|
||||||
|
- **Responsive design**: Works on all devices
|
||||||
|
- **Fast search**: Debounced with 300ms delay
|
||||||
|
- **CORS protected**: Only crawl4ai.com and localhost
|
||||||
|
|
||||||
|
## Admin Panel
|
||||||
|
|
||||||
|
Coming soon - for now, edit the database directly or modify `dummy_data.py`
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
|
||||||
|
For production deployment on EC2:
|
||||||
|
1. Update `API_BASE` in `marketplace.js` to production URL
|
||||||
|
2. Run FastAPI with proper production settings (use gunicorn/uvicorn)
|
||||||
|
3. Set up nginx proxy if needed
|
||||||
759
docs/md_v2/marketplace/admin/admin.css
Normal file
759
docs/md_v2/marketplace/admin/admin.css
Normal file
@@ -0,0 +1,759 @@
|
|||||||
|
/* Admin Dashboard - C4AI Terminal Style */
|
||||||
|
|
||||||
|
/* Utility Classes */
|
||||||
|
.hidden {
|
||||||
|
display: none !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Brand Colors */
|
||||||
|
:root {
|
||||||
|
--c4ai-cyan: #50ffff;
|
||||||
|
--c4ai-green: #50ff50;
|
||||||
|
--c4ai-yellow: #ffff50;
|
||||||
|
--c4ai-pink: #ff50ff;
|
||||||
|
--c4ai-blue: #5050ff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.admin-container {
|
||||||
|
min-height: 100vh;
|
||||||
|
background: var(--bg-dark);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Login Screen */
|
||||||
|
.login-screen {
|
||||||
|
min-height: 100vh;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
background: linear-gradient(135deg, #070708 0%, #1a1a2e 100%);
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-box {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 2px solid var(--primary-cyan);
|
||||||
|
padding: 3rem;
|
||||||
|
width: 400px;
|
||||||
|
box-shadow: 0 0 40px rgba(80, 255, 255, 0.2);
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-logo {
|
||||||
|
height: 60px;
|
||||||
|
margin-bottom: 2rem;
|
||||||
|
filter: brightness(1.2);
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-box h1 {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
font-size: 1.5rem;
|
||||||
|
margin-bottom: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
#login-form input {
|
||||||
|
width: 100%;
|
||||||
|
padding: 0.75rem;
|
||||||
|
background: var(--bg-dark);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-primary);
|
||||||
|
font-family: inherit;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
#login-form input:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
#login-form button {
|
||||||
|
width: 100%;
|
||||||
|
padding: 0.75rem;
|
||||||
|
background: linear-gradient(135deg, var(--primary-cyan), var(--primary-teal));
|
||||||
|
border: none;
|
||||||
|
color: var(--bg-dark);
|
||||||
|
font-weight: 600;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
#login-form button:hover {
|
||||||
|
box-shadow: 0 4px 15px rgba(80, 255, 255, 0.3);
|
||||||
|
transform: translateY(-2px);
|
||||||
|
}
|
||||||
|
|
||||||
|
.error-msg {
|
||||||
|
color: var(--error);
|
||||||
|
font-size: 0.875rem;
|
||||||
|
margin-top: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Admin Dashboard */
|
||||||
|
.admin-dashboard.hidden {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.admin-header {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border-bottom: 2px solid var(--primary-cyan);
|
||||||
|
padding: 1rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-content {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 0 2rem;
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-left {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-logo {
|
||||||
|
height: 35px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.admin-header h1 {
|
||||||
|
font-size: 1.25rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-right {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.admin-user {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.logout-btn {
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--error);
|
||||||
|
color: var(--error);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.logout-btn:hover {
|
||||||
|
background: rgba(255, 60, 116, 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Layout */
|
||||||
|
.admin-layout {
|
||||||
|
display: flex;
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 0 auto;
|
||||||
|
min-height: calc(100vh - 60px);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sidebar */
|
||||||
|
.admin-sidebar {
|
||||||
|
width: 250px;
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border-right: 1px solid var(--border-color);
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
justify-content: space-between;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-nav {
|
||||||
|
padding: 1rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-btn {
|
||||||
|
width: 100%;
|
||||||
|
padding: 1rem 1.5rem;
|
||||||
|
background: transparent;
|
||||||
|
border: none;
|
||||||
|
border-left: 3px solid transparent;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
text-align: left;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-btn:hover {
|
||||||
|
background: rgba(80, 255, 255, 0.05);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-btn.active {
|
||||||
|
border-left-color: var(--primary-cyan);
|
||||||
|
background: rgba(80, 255, 255, 0.1);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-icon {
|
||||||
|
font-size: 1.25rem;
|
||||||
|
margin-right: 0.25rem;
|
||||||
|
display: inline-block;
|
||||||
|
width: 1.5rem;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-btn[data-section="stats"] .nav-icon {
|
||||||
|
color: var(--c4ai-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-btn[data-section="apps"] .nav-icon {
|
||||||
|
color: var(--c4ai-green);
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-btn[data-section="articles"] .nav-icon {
|
||||||
|
color: var(--c4ai-yellow);
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-btn[data-section="categories"] .nav-icon {
|
||||||
|
color: var(--c4ai-pink);
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-btn[data-section="sponsors"] .nav-icon {
|
||||||
|
color: var(--c4ai-blue);
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-actions {
|
||||||
|
padding: 1rem;
|
||||||
|
border-top: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn {
|
||||||
|
width: 100%;
|
||||||
|
padding: 0.75rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Main Content */
|
||||||
|
.admin-main {
|
||||||
|
flex: 1;
|
||||||
|
padding: 2rem;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.content-section {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.content-section.active {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Stats Grid */
|
||||||
|
.stats-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
||||||
|
gap: 1.5rem;
|
||||||
|
margin-bottom: 3rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-card {
|
||||||
|
background: linear-gradient(135deg, rgba(80, 255, 255, 0.03), rgba(243, 128, 245, 0.02));
|
||||||
|
border: 1px solid rgba(80, 255, 255, 0.3);
|
||||||
|
padding: 1.5rem;
|
||||||
|
display: flex;
|
||||||
|
gap: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-icon {
|
||||||
|
font-size: 2rem;
|
||||||
|
width: 3rem;
|
||||||
|
height: 3rem;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
border: 2px solid;
|
||||||
|
border-radius: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-card:nth-child(1) .stat-icon {
|
||||||
|
color: var(--c4ai-cyan);
|
||||||
|
border-color: var(--c4ai-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-card:nth-child(2) .stat-icon {
|
||||||
|
color: var(--c4ai-green);
|
||||||
|
border-color: var(--c4ai-green);
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-card:nth-child(3) .stat-icon {
|
||||||
|
color: var(--c4ai-yellow);
|
||||||
|
border-color: var(--c4ai-yellow);
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-card:nth-child(4) .stat-icon {
|
||||||
|
color: var(--c4ai-pink);
|
||||||
|
border-color: var(--c4ai-pink);
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-number {
|
||||||
|
font-size: 2rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-label {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-detail {
|
||||||
|
font-size: 0.875rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
margin-top: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Quick Actions */
|
||||||
|
.quick-actions {
|
||||||
|
display: flex;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.quick-btn {
|
||||||
|
padding: 0.75rem 1.5rem;
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.quick-btn:hover {
|
||||||
|
background: rgba(80, 255, 255, 0.1);
|
||||||
|
transform: translateY(-2px);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Section Headers */
|
||||||
|
.section-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
margin-bottom: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section-header h2 {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-actions {
|
||||||
|
display: flex;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-input {
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
background: var(--bg-dark);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-primary);
|
||||||
|
width: 250px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-input:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.filter-select {
|
||||||
|
padding: 0.5rem;
|
||||||
|
background: var(--bg-dark);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.add-btn {
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
background: linear-gradient(135deg, var(--primary-cyan), var(--primary-teal));
|
||||||
|
border: none;
|
||||||
|
color: var(--bg-dark);
|
||||||
|
font-weight: 600;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.add-btn:hover {
|
||||||
|
box-shadow: 0 4px 15px rgba(80, 255, 255, 0.3);
|
||||||
|
transform: translateY(-2px);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Data Tables */
|
||||||
|
.data-table {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
overflow-x: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.data-table table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
}
|
||||||
|
|
||||||
|
.data-table th {
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
padding: 1rem;
|
||||||
|
text-align: left;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
font-weight: 600;
|
||||||
|
border-bottom: 2px solid var(--border-color);
|
||||||
|
position: sticky;
|
||||||
|
top: 0;
|
||||||
|
z-index: 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
.data-table td {
|
||||||
|
padding: 1rem;
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.data-table tr:hover {
|
||||||
|
background: rgba(80, 255, 255, 0.03);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Table Actions */
|
||||||
|
.table-actions {
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.table-logo {
|
||||||
|
width: 48px;
|
||||||
|
height: 48px;
|
||||||
|
object-fit: contain;
|
||||||
|
border-radius: 6px;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
padding: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-edit, .btn-delete, .btn-duplicate {
|
||||||
|
padding: 0.25rem 0.5rem;
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-edit:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-delete:hover {
|
||||||
|
border-color: var(--error);
|
||||||
|
color: var(--error);
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-duplicate:hover {
|
||||||
|
border-color: var(--accent-pink);
|
||||||
|
color: var(--accent-pink);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Badges in Tables */
|
||||||
|
.badge {
|
||||||
|
padding: 0.25rem 0.5rem;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
text-transform: uppercase;
|
||||||
|
}
|
||||||
|
|
||||||
|
.badge.featured {
|
||||||
|
background: var(--primary-cyan);
|
||||||
|
color: var(--bg-dark);
|
||||||
|
}
|
||||||
|
|
||||||
|
.badge.sponsored {
|
||||||
|
background: var(--warning);
|
||||||
|
color: var(--bg-dark);
|
||||||
|
}
|
||||||
|
|
||||||
|
.badge.active {
|
||||||
|
background: var(--success);
|
||||||
|
color: var(--bg-dark);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Modal Enhancements */
|
||||||
|
.modal-content.large {
|
||||||
|
max-width: 1000px;
|
||||||
|
width: 90%;
|
||||||
|
max-height: 90vh;
|
||||||
|
}
|
||||||
|
|
||||||
|
.modal-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
padding: 1.5rem;
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.modal-body {
|
||||||
|
padding: 1.5rem;
|
||||||
|
overflow-y: auto;
|
||||||
|
max-height: calc(90vh - 140px);
|
||||||
|
}
|
||||||
|
|
||||||
|
.modal-footer {
|
||||||
|
display: flex;
|
||||||
|
justify-content: flex-end;
|
||||||
|
gap: 1rem;
|
||||||
|
padding: 1rem 1.5rem;
|
||||||
|
border-top: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-cancel, .btn-save {
|
||||||
|
padding: 0.5rem 1.5rem;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-cancel {
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-cancel:hover {
|
||||||
|
border-color: var(--error);
|
||||||
|
color: var(--error);
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-save {
|
||||||
|
background: linear-gradient(135deg, var(--primary-cyan), var(--primary-teal));
|
||||||
|
border: none;
|
||||||
|
color: var(--bg-dark);
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-save:hover {
|
||||||
|
box-shadow: 0 4px 15px rgba(80, 255, 255, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Form Styles */
|
||||||
|
.form-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
||||||
|
gap: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.form-group {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.form-group label {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 0.875rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.form-group input,
|
||||||
|
.form-group select,
|
||||||
|
.form-group textarea {
|
||||||
|
padding: 0.5rem;
|
||||||
|
background: var(--bg-dark);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-primary);
|
||||||
|
font-family: inherit;
|
||||||
|
}
|
||||||
|
|
||||||
|
.form-group input:focus,
|
||||||
|
.form-group select:focus,
|
||||||
|
.form-group textarea:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.form-group.full-width {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.checkbox-group {
|
||||||
|
display: flex;
|
||||||
|
gap: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.checkbox-label {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-form {
|
||||||
|
grid-template-columns: 200px repeat(2, minmax(220px, 1fr));
|
||||||
|
align-items: flex-start;
|
||||||
|
grid-auto-flow: dense;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-logo-group {
|
||||||
|
grid-row: span 3;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.span-two {
|
||||||
|
grid-column: span 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
.logo-upload {
|
||||||
|
position: relative;
|
||||||
|
width: 180px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.image-preview {
|
||||||
|
width: 180px;
|
||||||
|
height: 180px;
|
||||||
|
border: 1px dashed var(--border-color);
|
||||||
|
border-radius: 12px;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.image-preview.empty {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 0.75rem;
|
||||||
|
text-align: center;
|
||||||
|
padding: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.image-preview img {
|
||||||
|
max-width: 100%;
|
||||||
|
max-height: 100%;
|
||||||
|
object-fit: contain;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-btn {
|
||||||
|
position: absolute;
|
||||||
|
left: 50%;
|
||||||
|
bottom: 12px;
|
||||||
|
transform: translateX(-50%);
|
||||||
|
padding: 0.35rem 1rem;
|
||||||
|
background: linear-gradient(135deg, var(--primary-cyan), var(--primary-teal));
|
||||||
|
border: none;
|
||||||
|
border-radius: 999px;
|
||||||
|
color: var(--bg-dark);
|
||||||
|
font-size: 0.75rem;
|
||||||
|
font-weight: 600;
|
||||||
|
cursor: pointer;
|
||||||
|
box-shadow: 0 6px 18px rgba(80, 255, 255, 0.25);
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-btn:hover {
|
||||||
|
box-shadow: 0 8px 22px rgba(80, 255, 255, 0.35);
|
||||||
|
}
|
||||||
|
|
||||||
|
.logo-upload input[type="file"] {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-hint {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: 960px) {
|
||||||
|
.sponsor-form {
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(240px, 1fr));
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-logo-group {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
grid-row: auto;
|
||||||
|
flex-direction: row;
|
||||||
|
align-items: center;
|
||||||
|
gap: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.logo-upload {
|
||||||
|
width: 160px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.span-two {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Rich Text Editor */
|
||||||
|
.editor-toolbar {
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5rem;
|
||||||
|
padding: 0.5rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
border-bottom: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.editor-btn {
|
||||||
|
padding: 0.25rem 0.5rem;
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.editor-btn:hover {
|
||||||
|
background: rgba(80, 255, 255, 0.1);
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.editor-content {
|
||||||
|
min-height: 300px;
|
||||||
|
padding: 1rem;
|
||||||
|
background: var(--bg-dark);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
font-family: 'Dank Mono', Monaco, monospace;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive */
|
||||||
|
@media (max-width: 1024px) {
|
||||||
|
.admin-layout {
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.admin-sidebar {
|
||||||
|
width: 100%;
|
||||||
|
border-right: none;
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-nav {
|
||||||
|
display: flex;
|
||||||
|
overflow-x: auto;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-btn {
|
||||||
|
border-left: none;
|
||||||
|
border-bottom: 3px solid transparent;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-btn.active {
|
||||||
|
border-bottom-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-actions {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
}
|
||||||
933
docs/md_v2/marketplace/admin/admin.js
Normal file
933
docs/md_v2/marketplace/admin/admin.js
Normal file
@@ -0,0 +1,933 @@
|
|||||||
|
// Admin Dashboard - Smart & Powerful
|
||||||
|
const { API_BASE, API_ORIGIN } = (() => {
|
||||||
|
const cleanOrigin = (value) => value ? value.replace(/\/$/, '') : '';
|
||||||
|
const params = new URLSearchParams(window.location.search);
|
||||||
|
const overrideParam = cleanOrigin(params.get('api_origin'));
|
||||||
|
|
||||||
|
let storedOverride = '';
|
||||||
|
try {
|
||||||
|
storedOverride = cleanOrigin(localStorage.getItem('marketplace_api_origin'));
|
||||||
|
} catch (error) {
|
||||||
|
storedOverride = '';
|
||||||
|
}
|
||||||
|
|
||||||
|
let origin = overrideParam || storedOverride;
|
||||||
|
|
||||||
|
if (overrideParam && overrideParam !== storedOverride) {
|
||||||
|
try {
|
||||||
|
localStorage.setItem('marketplace_api_origin', overrideParam);
|
||||||
|
} catch (error) {
|
||||||
|
// ignore storage errors (private mode, etc.)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const { protocol, hostname, port } = window.location;
|
||||||
|
const isLocalHost = ['localhost', '127.0.0.1', '0.0.0.0'].includes(hostname);
|
||||||
|
|
||||||
|
if (!origin && isLocalHost && port !== '8100') {
|
||||||
|
origin = `${protocol}//127.0.0.1:8100`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (origin) {
|
||||||
|
const normalized = cleanOrigin(origin);
|
||||||
|
return { API_BASE: `${normalized}/marketplace/api`, API_ORIGIN: normalized };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { API_BASE: '/marketplace/api', API_ORIGIN: '' };
|
||||||
|
})();
|
||||||
|
|
||||||
|
const resolveAssetUrl = (path) => {
|
||||||
|
if (!path) return '';
|
||||||
|
if (/^https?:\/\//i.test(path)) return path;
|
||||||
|
if (path.startsWith('/') && API_ORIGIN) {
|
||||||
|
return `${API_ORIGIN}${path}`;
|
||||||
|
}
|
||||||
|
return path;
|
||||||
|
};
|
||||||
|
|
||||||
|
class AdminDashboard {
|
||||||
|
constructor() {
|
||||||
|
this.token = localStorage.getItem('admin_token');
|
||||||
|
this.currentSection = 'stats';
|
||||||
|
this.data = {
|
||||||
|
apps: [],
|
||||||
|
articles: [],
|
||||||
|
categories: [],
|
||||||
|
sponsors: []
|
||||||
|
};
|
||||||
|
this.editingItem = null;
|
||||||
|
this.init();
|
||||||
|
}
|
||||||
|
|
||||||
|
async init() {
|
||||||
|
// Check auth
|
||||||
|
if (!this.token) {
|
||||||
|
this.showLogin();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to load stats to verify token
|
||||||
|
try {
|
||||||
|
await this.loadStats();
|
||||||
|
this.showDashboard();
|
||||||
|
this.setupEventListeners();
|
||||||
|
await this.loadAllData();
|
||||||
|
} catch (error) {
|
||||||
|
if (error.status === 401) {
|
||||||
|
this.showLogin();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
showLogin() {
|
||||||
|
document.getElementById('login-screen').classList.remove('hidden');
|
||||||
|
document.getElementById('admin-dashboard').classList.add('hidden');
|
||||||
|
|
||||||
|
// Set up login button click handler
|
||||||
|
const loginBtn = document.getElementById('login-btn');
|
||||||
|
if (loginBtn) {
|
||||||
|
loginBtn.onclick = async () => {
|
||||||
|
const password = document.getElementById('password').value;
|
||||||
|
await this.login(password);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async login(password) {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${API_BASE}/admin/login`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ password })
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) throw new Error('Invalid password');
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
this.token = data.token;
|
||||||
|
localStorage.setItem('admin_token', this.token);
|
||||||
|
|
||||||
|
document.getElementById('login-screen').classList.add('hidden');
|
||||||
|
this.showDashboard();
|
||||||
|
this.setupEventListeners();
|
||||||
|
await this.loadAllData();
|
||||||
|
} catch (error) {
|
||||||
|
document.getElementById('login-error').textContent = 'Invalid password';
|
||||||
|
document.getElementById('password').value = '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
showDashboard() {
|
||||||
|
document.getElementById('login-screen').classList.add('hidden');
|
||||||
|
document.getElementById('admin-dashboard').classList.remove('hidden');
|
||||||
|
}
|
||||||
|
|
||||||
|
setupEventListeners() {
|
||||||
|
// Navigation
|
||||||
|
document.querySelectorAll('.nav-btn').forEach(btn => {
|
||||||
|
btn.onclick = () => this.switchSection(btn.dataset.section);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Logout
|
||||||
|
document.getElementById('logout-btn').onclick = () => this.logout();
|
||||||
|
|
||||||
|
// Export/Backup
|
||||||
|
document.getElementById('export-btn').onclick = () => this.exportData();
|
||||||
|
document.getElementById('backup-btn').onclick = () => this.backupDatabase();
|
||||||
|
|
||||||
|
// Search
|
||||||
|
['apps', 'articles'].forEach(type => {
|
||||||
|
const searchInput = document.getElementById(`${type}-search`);
|
||||||
|
if (searchInput) {
|
||||||
|
searchInput.oninput = (e) => this.filterTable(type, e.target.value);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Category filter
|
||||||
|
const categoryFilter = document.getElementById('apps-filter');
|
||||||
|
if (categoryFilter) {
|
||||||
|
categoryFilter.onchange = (e) => this.filterByCategory(e.target.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save button in modal
|
||||||
|
document.getElementById('save-btn').onclick = () => this.saveItem();
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadAllData() {
|
||||||
|
try {
|
||||||
|
await this.loadStats();
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load stats:', e);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.loadApps();
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load apps:', e);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.loadArticles();
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load articles:', e);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.loadCategories();
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load categories:', e);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.loadSponsors();
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load sponsors:', e);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.populateCategoryFilter();
|
||||||
|
}
|
||||||
|
|
||||||
|
async apiCall(endpoint, options = {}) {
|
||||||
|
const isFormData = options.body instanceof FormData;
|
||||||
|
const headers = {
|
||||||
|
'Authorization': `Bearer ${this.token}`,
|
||||||
|
...options.headers
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!isFormData && !headers['Content-Type']) {
|
||||||
|
headers['Content-Type'] = 'application/json';
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch(`${API_BASE}${endpoint}`, {
|
||||||
|
...options,
|
||||||
|
headers
|
||||||
|
});
|
||||||
|
|
||||||
|
if (response.status === 401) {
|
||||||
|
this.logout();
|
||||||
|
throw { status: 401 };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!response.ok) throw new Error(`API Error: ${response.status}`);
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadStats() {
|
||||||
|
const stats = await this.apiCall(`/admin/stats?_=${Date.now()}`, {
|
||||||
|
cache: 'no-store'
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById('stat-apps').textContent = stats.apps.total;
|
||||||
|
document.getElementById('stat-featured').textContent = stats.apps.featured;
|
||||||
|
document.getElementById('stat-sponsored').textContent = stats.apps.sponsored;
|
||||||
|
document.getElementById('stat-articles').textContent = stats.articles;
|
||||||
|
document.getElementById('stat-sponsors').textContent = stats.sponsors.active;
|
||||||
|
document.getElementById('stat-views').textContent = this.formatNumber(stats.total_views);
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadApps() {
|
||||||
|
this.data.apps = await this.apiCall(`/apps?limit=100&_=${Date.now()}`, {
|
||||||
|
cache: 'no-store'
|
||||||
|
});
|
||||||
|
this.renderAppsTable(this.data.apps);
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadArticles() {
|
||||||
|
this.data.articles = await this.apiCall(`/articles?limit=100&_=${Date.now()}`, {
|
||||||
|
cache: 'no-store'
|
||||||
|
});
|
||||||
|
this.renderArticlesTable(this.data.articles);
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadCategories() {
|
||||||
|
const cacheBuster = Date.now();
|
||||||
|
this.data.categories = await this.apiCall(`/categories?_=${cacheBuster}`, {
|
||||||
|
cache: 'no-store'
|
||||||
|
});
|
||||||
|
this.renderCategoriesTable(this.data.categories);
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadSponsors() {
|
||||||
|
const cacheBuster = Date.now();
|
||||||
|
this.data.sponsors = await this.apiCall(`/sponsors?limit=100&_=${cacheBuster}`, {
|
||||||
|
cache: 'no-store'
|
||||||
|
});
|
||||||
|
this.renderSponsorsTable(this.data.sponsors);
|
||||||
|
}
|
||||||
|
|
||||||
|
renderAppsTable(apps) {
|
||||||
|
const table = document.getElementById('apps-table');
|
||||||
|
table.innerHTML = `
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>ID</th>
|
||||||
|
<th>Name</th>
|
||||||
|
<th>Category</th>
|
||||||
|
<th>Type</th>
|
||||||
|
<th>Rating</th>
|
||||||
|
<th>Downloads</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
${apps.map(app => `
|
||||||
|
<tr>
|
||||||
|
<td>${app.id}</td>
|
||||||
|
<td>${app.name}</td>
|
||||||
|
<td>${app.category}</td>
|
||||||
|
<td>${app.type}</td>
|
||||||
|
<td>◆ ${app.rating}/5</td>
|
||||||
|
<td>${this.formatNumber(app.downloads)}</td>
|
||||||
|
<td>
|
||||||
|
${app.featured ? '<span class="badge featured">Featured</span>' : ''}
|
||||||
|
${app.sponsored ? '<span class="badge sponsored">Sponsored</span>' : ''}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<div class="table-actions">
|
||||||
|
<button class="btn-edit" onclick="admin.editItem('apps', ${app.id})">Edit</button>
|
||||||
|
<button class="btn-duplicate" onclick="admin.duplicateItem('apps', ${app.id})">Duplicate</button>
|
||||||
|
<button class="btn-delete" onclick="admin.deleteItem('apps', ${app.id})">Delete</button>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
`).join('')}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
renderArticlesTable(articles) {
|
||||||
|
const table = document.getElementById('articles-table');
|
||||||
|
table.innerHTML = `
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>ID</th>
|
||||||
|
<th>Title</th>
|
||||||
|
<th>Category</th>
|
||||||
|
<th>Author</th>
|
||||||
|
<th>Published</th>
|
||||||
|
<th>Views</th>
|
||||||
|
<th>Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
${articles.map(article => `
|
||||||
|
<tr>
|
||||||
|
<td>${article.id}</td>
|
||||||
|
<td>${article.title}</td>
|
||||||
|
<td>${article.category}</td>
|
||||||
|
<td>${article.author}</td>
|
||||||
|
<td>${new Date(article.published_date).toLocaleDateString()}</td>
|
||||||
|
<td>${this.formatNumber(article.views)}</td>
|
||||||
|
<td>
|
||||||
|
<div class="table-actions">
|
||||||
|
<button class="btn-edit" onclick="admin.editItem('articles', ${article.id})">Edit</button>
|
||||||
|
<button class="btn-duplicate" onclick="admin.duplicateItem('articles', ${article.id})">Duplicate</button>
|
||||||
|
<button class="btn-delete" onclick="admin.deleteItem('articles', ${article.id})">Delete</button>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
`).join('')}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
renderCategoriesTable(categories) {
|
||||||
|
const table = document.getElementById('categories-table');
|
||||||
|
table.innerHTML = `
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Order</th>
|
||||||
|
<th>Icon</th>
|
||||||
|
<th>Name</th>
|
||||||
|
<th>Description</th>
|
||||||
|
<th>Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
${categories.map(cat => `
|
||||||
|
<tr>
|
||||||
|
<td>${cat.order_index}</td>
|
||||||
|
<td>${cat.icon}</td>
|
||||||
|
<td>${cat.name}</td>
|
||||||
|
<td>${cat.description}</td>
|
||||||
|
<td>
|
||||||
|
<div class="table-actions">
|
||||||
|
<button class="btn-edit" onclick="admin.editItem('categories', ${cat.id})">Edit</button>
|
||||||
|
<button class="btn-delete" onclick="admin.deleteCategory(${cat.id})">Delete</button>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
`).join('')}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
renderSponsorsTable(sponsors) {
|
||||||
|
const table = document.getElementById('sponsors-table');
|
||||||
|
table.innerHTML = `
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>ID</th>
|
||||||
|
<th>Logo</th>
|
||||||
|
<th>Company</th>
|
||||||
|
<th>Tier</th>
|
||||||
|
<th>Start</th>
|
||||||
|
<th>End</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
${sponsors.map(sponsor => `
|
||||||
|
<tr>
|
||||||
|
<td>${sponsor.id}</td>
|
||||||
|
<td>${sponsor.logo_url ? `<img class="table-logo" src="${resolveAssetUrl(sponsor.logo_url)}" alt="${sponsor.company_name} logo">` : '-'}</td>
|
||||||
|
<td>${sponsor.company_name}</td>
|
||||||
|
<td>${sponsor.tier}</td>
|
||||||
|
<td>${new Date(sponsor.start_date).toLocaleDateString()}</td>
|
||||||
|
<td>${new Date(sponsor.end_date).toLocaleDateString()}</td>
|
||||||
|
<td>${sponsor.active ? '<span class="badge active">Active</span>' : 'Inactive'}</td>
|
||||||
|
<td>
|
||||||
|
<div class="table-actions">
|
||||||
|
<button class="btn-edit" onclick="admin.editItem('sponsors', ${sponsor.id})">Edit</button>
|
||||||
|
<button class="btn-delete" onclick="admin.deleteItem('sponsors', ${sponsor.id})">Delete</button>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
`).join('')}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
showAddForm(type) {
|
||||||
|
this.editingItem = null;
|
||||||
|
this.showModal(type, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
async editItem(type, id) {
|
||||||
|
const item = this.data[type].find(i => i.id === id);
|
||||||
|
if (item) {
|
||||||
|
this.editingItem = item;
|
||||||
|
this.showModal(type, item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async duplicateItem(type, id) {
|
||||||
|
const item = this.data[type].find(i => i.id === id);
|
||||||
|
if (item) {
|
||||||
|
const newItem = { ...item };
|
||||||
|
delete newItem.id;
|
||||||
|
newItem.name = `${newItem.name || newItem.title} (Copy)`;
|
||||||
|
if (newItem.slug) newItem.slug = `${newItem.slug}-copy-${Date.now()}`;
|
||||||
|
|
||||||
|
this.editingItem = null;
|
||||||
|
this.showModal(type, newItem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
showModal(type, item) {
|
||||||
|
const modal = document.getElementById('form-modal');
|
||||||
|
const title = document.getElementById('modal-title');
|
||||||
|
const body = document.getElementById('modal-body');
|
||||||
|
|
||||||
|
title.textContent = item ? `Edit ${type.slice(0, -1)}` : `Add New ${type.slice(0, -1)}`;
|
||||||
|
|
||||||
|
if (type === 'apps') {
|
||||||
|
body.innerHTML = this.getAppForm(item);
|
||||||
|
} else if (type === 'articles') {
|
||||||
|
body.innerHTML = this.getArticleForm(item);
|
||||||
|
} else if (type === 'categories') {
|
||||||
|
body.innerHTML = this.getCategoryForm(item);
|
||||||
|
} else if (type === 'sponsors') {
|
||||||
|
body.innerHTML = this.getSponsorForm(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
modal.classList.remove('hidden');
|
||||||
|
modal.dataset.type = type;
|
||||||
|
|
||||||
|
if (type === 'sponsors') {
|
||||||
|
this.setupLogoUploadHandlers();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
getAppForm(app) {
|
||||||
|
return `
|
||||||
|
<div class="form-grid">
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Name *</label>
|
||||||
|
<input type="text" id="form-name" value="${app?.name || ''}" required>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Slug</label>
|
||||||
|
<input type="text" id="form-slug" value="${app?.slug || ''}" placeholder="auto-generated">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Category</label>
|
||||||
|
<select id="form-category">
|
||||||
|
${this.data.categories.map(cat =>
|
||||||
|
`<option value="${cat.name}" ${app?.category === cat.name ? 'selected' : ''}>${cat.name}</option>`
|
||||||
|
).join('')}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Type</label>
|
||||||
|
<select id="form-type">
|
||||||
|
<option value="Open Source" ${app?.type === 'Open Source' ? 'selected' : ''}>Open Source</option>
|
||||||
|
<option value="Paid" ${app?.type === 'Paid' ? 'selected' : ''}>Paid</option>
|
||||||
|
<option value="Freemium" ${app?.type === 'Freemium' ? 'selected' : ''}>Freemium</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Rating</label>
|
||||||
|
<input type="number" id="form-rating" value="${app?.rating || 4.5}" min="0" max="5" step="0.1">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Downloads</label>
|
||||||
|
<input type="number" id="form-downloads" value="${app?.downloads || 0}">
|
||||||
|
</div>
|
||||||
|
<div class="form-group full-width">
|
||||||
|
<label>Description</label>
|
||||||
|
<textarea id="form-description" rows="3">${app?.description || ''}</textarea>
|
||||||
|
</div>
|
||||||
|
<div class="form-group full-width">
|
||||||
|
<label>Image URL</label>
|
||||||
|
<input type="text" id="form-image" value="${app?.image || ''}" placeholder="https://...">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Website URL</label>
|
||||||
|
<input type="text" id="form-website" value="${app?.website_url || ''}">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>GitHub URL</label>
|
||||||
|
<input type="text" id="form-github" value="${app?.github_url || ''}">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Pricing</label>
|
||||||
|
<input type="text" id="form-pricing" value="${app?.pricing || 'Free'}">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Contact Email</label>
|
||||||
|
<input type="email" id="form-email" value="${app?.contact_email || ''}">
|
||||||
|
</div>
|
||||||
|
<div class="form-group full-width checkbox-group">
|
||||||
|
<label class="checkbox-label">
|
||||||
|
<input type="checkbox" id="form-featured" ${app?.featured ? 'checked' : ''}>
|
||||||
|
Featured
|
||||||
|
</label>
|
||||||
|
<label class="checkbox-label">
|
||||||
|
<input type="checkbox" id="form-sponsored" ${app?.sponsored ? 'checked' : ''}>
|
||||||
|
Sponsored
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
<div class="form-group full-width">
|
||||||
|
<label>Long Description (Markdown - Overview tab)</label>
|
||||||
|
<textarea id="form-long-description" rows="10" placeholder="Enter detailed description with markdown formatting...">${app?.long_description || ''}</textarea>
|
||||||
|
<small>Markdown support: **bold**, *italic*, [links](url), # headers, code blocks, lists</small>
|
||||||
|
</div>
|
||||||
|
<div class="form-group full-width">
|
||||||
|
<label>Integration Guide (Markdown - Integration tab)</label>
|
||||||
|
<textarea id="form-integration" rows="20" placeholder="Enter integration guide with installation, examples, and code snippets using markdown...">${app?.integration_guide || ''}</textarea>
|
||||||
|
<small>Single markdown field with installation, examples, and complete guide. Code blocks get auto copy buttons.</small>
|
||||||
|
</div>
|
||||||
|
<div class="form-group full-width">
|
||||||
|
<label>Documentation (Markdown - Documentation tab)</label>
|
||||||
|
<textarea id="form-documentation" rows="20" placeholder="Enter documentation with API reference, examples, and best practices using markdown...">${app?.documentation || ''}</textarea>
|
||||||
|
<small>Full documentation with API reference, examples, best practices, etc.</small>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
getArticleForm(article) {
|
||||||
|
return `
|
||||||
|
<div class="form-grid">
|
||||||
|
<div class="form-group full-width">
|
||||||
|
<label>Title *</label>
|
||||||
|
<input type="text" id="form-title" value="${article?.title || ''}" required>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Author</label>
|
||||||
|
<input type="text" id="form-author" value="${article?.author || 'Crawl4AI Team'}">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Category</label>
|
||||||
|
<select id="form-category">
|
||||||
|
<option value="News" ${article?.category === 'News' ? 'selected' : ''}>News</option>
|
||||||
|
<option value="Tutorial" ${article?.category === 'Tutorial' ? 'selected' : ''}>Tutorial</option>
|
||||||
|
<option value="Review" ${article?.category === 'Review' ? 'selected' : ''}>Review</option>
|
||||||
|
<option value="Comparison" ${article?.category === 'Comparison' ? 'selected' : ''}>Comparison</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="form-group full-width">
|
||||||
|
<label>Featured Image URL</label>
|
||||||
|
<input type="text" id="form-image" value="${article?.featured_image || ''}">
|
||||||
|
</div>
|
||||||
|
<div class="form-group full-width">
|
||||||
|
<label>Content</label>
|
||||||
|
<textarea id="form-content" rows="20">${article?.content || ''}</textarea>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
getCategoryForm(category) {
|
||||||
|
return `
|
||||||
|
<div class="form-grid">
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Name *</label>
|
||||||
|
<input type="text" id="form-name" value="${category?.name || ''}" required>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Icon</label>
|
||||||
|
<input type="text" id="form-icon" value="${category?.icon || '📁'}" maxlength="2">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Order</label>
|
||||||
|
<input type="number" id="form-order" value="${category?.order_index || 0}">
|
||||||
|
</div>
|
||||||
|
<div class="form-group full-width">
|
||||||
|
<label>Description</label>
|
||||||
|
<textarea id="form-description" rows="3">${category?.description || ''}</textarea>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
getSponsorForm(sponsor) {
|
||||||
|
const existingFile = sponsor?.logo_url ? sponsor.logo_url.split('/').pop().split('?')[0] : '';
|
||||||
|
return `
|
||||||
|
<div class="form-grid sponsor-form">
|
||||||
|
<div class="form-group sponsor-logo-group">
|
||||||
|
<label>Logo</label>
|
||||||
|
<input type="hidden" id="form-logo-url" value="${sponsor?.logo_url || ''}">
|
||||||
|
<div class="logo-upload">
|
||||||
|
<div class="image-preview ${sponsor?.logo_url ? '' : 'empty'}" id="form-logo-preview">
|
||||||
|
${sponsor?.logo_url ? `<img src="${resolveAssetUrl(sponsor.logo_url)}" alt="Logo preview">` : '<span>No logo uploaded</span>'}
|
||||||
|
</div>
|
||||||
|
<button type="button" class="upload-btn" id="form-logo-button">Upload Logo</button>
|
||||||
|
<input type="file" id="form-logo-file" accept="image/png,image/jpeg,image/webp,image/svg+xml" hidden>
|
||||||
|
</div>
|
||||||
|
<p class="upload-hint" id="form-logo-filename">${existingFile ? `Current: ${existingFile}` : 'No file selected'}</p>
|
||||||
|
</div>
|
||||||
|
<div class="form-group span-two">
|
||||||
|
<label>Company Name *</label>
|
||||||
|
<input type="text" id="form-name" value="${sponsor?.company_name || ''}" required>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Tier</label>
|
||||||
|
<select id="form-tier">
|
||||||
|
<option value="Bronze" ${sponsor?.tier === 'Bronze' ? 'selected' : ''}>Bronze</option>
|
||||||
|
<option value="Silver" ${sponsor?.tier === 'Silver' ? 'selected' : ''}>Silver</option>
|
||||||
|
<option value="Gold" ${sponsor?.tier === 'Gold' ? 'selected' : ''}>Gold</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Landing URL</label>
|
||||||
|
<input type="text" id="form-landing" value="${sponsor?.landing_url || ''}">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Banner URL</label>
|
||||||
|
<input type="text" id="form-banner" value="${sponsor?.banner_url || ''}">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>Start Date</label>
|
||||||
|
<input type="date" id="form-start" value="${sponsor?.start_date?.split('T')[0] || ''}">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>End Date</label>
|
||||||
|
<input type="date" id="form-end" value="${sponsor?.end_date?.split('T')[0] || ''}">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label class="checkbox-label">
|
||||||
|
<input type="checkbox" id="form-active" ${sponsor?.active ? 'checked' : ''}>
|
||||||
|
Active
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async saveItem() {
|
||||||
|
const modal = document.getElementById('form-modal');
|
||||||
|
const type = modal.dataset.type;
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (type === 'sponsors') {
|
||||||
|
const fileInput = document.getElementById('form-logo-file');
|
||||||
|
if (fileInput && fileInput.files && fileInput.files[0]) {
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('file', fileInput.files[0]);
|
||||||
|
formData.append('folder', 'sponsors');
|
||||||
|
|
||||||
|
const uploadResponse = await this.apiCall('/admin/upload-image', {
|
||||||
|
method: 'POST',
|
||||||
|
body: formData
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!uploadResponse.url) {
|
||||||
|
throw new Error('Image upload failed');
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('form-logo-url').value = uploadResponse.url;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = this.collectFormData(type);
|
||||||
|
|
||||||
|
if (this.editingItem) {
|
||||||
|
await this.apiCall(`/admin/${type}/${this.editingItem.id}`, {
|
||||||
|
method: 'PUT',
|
||||||
|
body: JSON.stringify(data)
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
await this.apiCall(`/admin/${type}`, {
|
||||||
|
method: 'POST',
|
||||||
|
body: JSON.stringify(data)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
this.closeModal();
|
||||||
|
await this[`load${type.charAt(0).toUpperCase() + type.slice(1)}`]();
|
||||||
|
await this.loadStats();
|
||||||
|
} catch (error) {
|
||||||
|
alert('Error saving item: ' + error.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
collectFormData(type) {
|
||||||
|
const data = {};
|
||||||
|
|
||||||
|
if (type === 'apps') {
|
||||||
|
data.name = document.getElementById('form-name').value;
|
||||||
|
data.slug = document.getElementById('form-slug').value || this.generateSlug(data.name);
|
||||||
|
data.description = document.getElementById('form-description').value;
|
||||||
|
data.category = document.getElementById('form-category').value;
|
||||||
|
data.type = document.getElementById('form-type').value;
|
||||||
|
const rating = parseFloat(document.getElementById('form-rating').value);
|
||||||
|
const downloads = parseInt(document.getElementById('form-downloads').value, 10);
|
||||||
|
data.rating = Number.isFinite(rating) ? rating : 0;
|
||||||
|
data.downloads = Number.isFinite(downloads) ? downloads : 0;
|
||||||
|
data.image = document.getElementById('form-image').value;
|
||||||
|
data.website_url = document.getElementById('form-website').value;
|
||||||
|
data.github_url = document.getElementById('form-github').value;
|
||||||
|
data.pricing = document.getElementById('form-pricing').value;
|
||||||
|
data.contact_email = document.getElementById('form-email').value;
|
||||||
|
data.featured = document.getElementById('form-featured').checked ? 1 : 0;
|
||||||
|
data.sponsored = document.getElementById('form-sponsored').checked ? 1 : 0;
|
||||||
|
data.long_description = document.getElementById('form-long-description').value;
|
||||||
|
data.integration_guide = document.getElementById('form-integration').value;
|
||||||
|
data.documentation = document.getElementById('form-documentation').value;
|
||||||
|
} else if (type === 'articles') {
|
||||||
|
data.title = document.getElementById('form-title').value;
|
||||||
|
data.slug = this.generateSlug(data.title);
|
||||||
|
data.author = document.getElementById('form-author').value;
|
||||||
|
data.category = document.getElementById('form-category').value;
|
||||||
|
data.featured_image = document.getElementById('form-image').value;
|
||||||
|
data.content = document.getElementById('form-content').value;
|
||||||
|
} else if (type === 'categories') {
|
||||||
|
data.name = document.getElementById('form-name').value;
|
||||||
|
data.slug = this.generateSlug(data.name);
|
||||||
|
data.icon = document.getElementById('form-icon').value;
|
||||||
|
data.description = document.getElementById('form-description').value;
|
||||||
|
const orderIndex = parseInt(document.getElementById('form-order').value, 10);
|
||||||
|
data.order_index = Number.isFinite(orderIndex) ? orderIndex : 0;
|
||||||
|
} else if (type === 'sponsors') {
|
||||||
|
data.company_name = document.getElementById('form-name').value;
|
||||||
|
data.logo_url = document.getElementById('form-logo-url').value;
|
||||||
|
data.tier = document.getElementById('form-tier').value;
|
||||||
|
data.landing_url = document.getElementById('form-landing').value;
|
||||||
|
data.banner_url = document.getElementById('form-banner').value;
|
||||||
|
data.start_date = document.getElementById('form-start').value;
|
||||||
|
data.end_date = document.getElementById('form-end').value;
|
||||||
|
data.active = document.getElementById('form-active').checked ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
setupLogoUploadHandlers() {
|
||||||
|
const fileInput = document.getElementById('form-logo-file');
|
||||||
|
const preview = document.getElementById('form-logo-preview');
|
||||||
|
const logoUrlInput = document.getElementById('form-logo-url');
|
||||||
|
const trigger = document.getElementById('form-logo-button');
|
||||||
|
const fileNameEl = document.getElementById('form-logo-filename');
|
||||||
|
|
||||||
|
if (!fileInput || !preview || !logoUrlInput) return;
|
||||||
|
|
||||||
|
const setFileName = (text) => {
|
||||||
|
if (fileNameEl) {
|
||||||
|
fileNameEl.textContent = text;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const setEmptyState = () => {
|
||||||
|
preview.innerHTML = '<span>No logo uploaded</span>';
|
||||||
|
preview.classList.add('empty');
|
||||||
|
setFileName('No file selected');
|
||||||
|
};
|
||||||
|
|
||||||
|
const setExistingState = () => {
|
||||||
|
if (logoUrlInput.value) {
|
||||||
|
const existingFile = logoUrlInput.value.split('/').pop().split('?')[0];
|
||||||
|
preview.innerHTML = `<img src="${resolveAssetUrl(logoUrlInput.value)}" alt="Logo preview">`;
|
||||||
|
preview.classList.remove('empty');
|
||||||
|
setFileName(existingFile ? `Current: ${existingFile}` : 'Current logo');
|
||||||
|
} else {
|
||||||
|
setEmptyState();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
setExistingState();
|
||||||
|
|
||||||
|
if (trigger) {
|
||||||
|
trigger.onclick = () => fileInput.click();
|
||||||
|
}
|
||||||
|
|
||||||
|
fileInput.addEventListener('change', (event) => {
|
||||||
|
const file = event.target.files && event.target.files[0];
|
||||||
|
|
||||||
|
if (!file) {
|
||||||
|
setExistingState();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setFileName(file.name);
|
||||||
|
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.onload = () => {
|
||||||
|
preview.innerHTML = `<img src="${reader.result}" alt="Logo preview">`;
|
||||||
|
preview.classList.remove('empty');
|
||||||
|
};
|
||||||
|
reader.readAsDataURL(file);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async deleteItem(type, id) {
|
||||||
|
if (!confirm(`Are you sure you want to delete this ${type.slice(0, -1)}?`)) return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.apiCall(`/admin/${type}/${id}`, { method: 'DELETE' });
|
||||||
|
await this[`load${type.charAt(0).toUpperCase() + type.slice(1)}`]();
|
||||||
|
await this.loadStats();
|
||||||
|
} catch (error) {
|
||||||
|
alert('Error deleting item: ' + error.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async deleteCategory(id) {
|
||||||
|
const hasApps = this.data.apps.some(app =>
|
||||||
|
app.category === this.data.categories.find(c => c.id === id)?.name
|
||||||
|
);
|
||||||
|
|
||||||
|
if (hasApps) {
|
||||||
|
alert('Cannot delete category with existing apps');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.deleteItem('categories', id);
|
||||||
|
}
|
||||||
|
|
||||||
|
closeModal() {
|
||||||
|
document.getElementById('form-modal').classList.add('hidden');
|
||||||
|
this.editingItem = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
switchSection(section) {
|
||||||
|
// Update navigation
|
||||||
|
document.querySelectorAll('.nav-btn').forEach(btn => {
|
||||||
|
btn.classList.toggle('active', btn.dataset.section === section);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Show section
|
||||||
|
document.querySelectorAll('.content-section').forEach(sec => {
|
||||||
|
sec.classList.remove('active');
|
||||||
|
});
|
||||||
|
document.getElementById(`${section}-section`).classList.add('active');
|
||||||
|
|
||||||
|
this.currentSection = section;
|
||||||
|
}
|
||||||
|
|
||||||
|
filterTable(type, query) {
|
||||||
|
const items = this.data[type].filter(item => {
|
||||||
|
const searchText = Object.values(item).join(' ').toLowerCase();
|
||||||
|
return searchText.includes(query.toLowerCase());
|
||||||
|
});
|
||||||
|
|
||||||
|
if (type === 'apps') {
|
||||||
|
this.renderAppsTable(items);
|
||||||
|
} else if (type === 'articles') {
|
||||||
|
this.renderArticlesTable(items);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
filterByCategory(category) {
|
||||||
|
const apps = category
|
||||||
|
? this.data.apps.filter(app => app.category === category)
|
||||||
|
: this.data.apps;
|
||||||
|
this.renderAppsTable(apps);
|
||||||
|
}
|
||||||
|
|
||||||
|
populateCategoryFilter() {
|
||||||
|
const filter = document.getElementById('apps-filter');
|
||||||
|
if (!filter) return;
|
||||||
|
|
||||||
|
filter.innerHTML = '<option value="">All Categories</option>';
|
||||||
|
this.data.categories.forEach(cat => {
|
||||||
|
filter.innerHTML += `<option value="${cat.name}">${cat.name}</option>`;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async exportData() {
|
||||||
|
const data = {
|
||||||
|
apps: this.data.apps,
|
||||||
|
articles: this.data.articles,
|
||||||
|
categories: this.data.categories,
|
||||||
|
sponsors: this.data.sponsors,
|
||||||
|
exported: new Date().toISOString()
|
||||||
|
};
|
||||||
|
|
||||||
|
const blob = new Blob([JSON.stringify(data, null, 2)], { type: 'application/json' });
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
const a = document.createElement('a');
|
||||||
|
a.href = url;
|
||||||
|
a.download = `marketplace-export-${Date.now()}.json`;
|
||||||
|
a.click();
|
||||||
|
}
|
||||||
|
|
||||||
|
async backupDatabase() {
|
||||||
|
// In production, this would download the SQLite file
|
||||||
|
alert('Database backup would be implemented on the server side');
|
||||||
|
}
|
||||||
|
|
||||||
|
generateSlug(text) {
|
||||||
|
return text.toLowerCase()
|
||||||
|
.replace(/[^\w\s-]/g, '')
|
||||||
|
.replace(/\s+/g, '-')
|
||||||
|
.replace(/-+/g, '-')
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
formatNumber(num) {
|
||||||
|
if (num >= 1000000) return (num / 1000000).toFixed(1) + 'M';
|
||||||
|
if (num >= 1000) return (num / 1000).toFixed(1) + 'K';
|
||||||
|
return num.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
logout() {
|
||||||
|
localStorage.removeItem('admin_token');
|
||||||
|
this.token = null;
|
||||||
|
this.showLogin();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize
|
||||||
|
const admin = new AdminDashboard();
|
||||||
215
docs/md_v2/marketplace/admin/index.html
Normal file
215
docs/md_v2/marketplace/admin/index.html
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en" data-theme="dark">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Admin Dashboard - Crawl4AI Marketplace</title>
|
||||||
|
<link rel="stylesheet" href="../frontend/marketplace.css?v=1759329000">
|
||||||
|
<link rel="stylesheet" href="admin.css?v=1759329000">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="admin-container">
|
||||||
|
<!-- Login Screen -->
|
||||||
|
<div id="login-screen" class="login-screen">
|
||||||
|
<div class="login-box">
|
||||||
|
<img src="../../assets/images/logo.png" alt="Crawl4AI" class="login-logo">
|
||||||
|
<h1>[ Admin Access ]</h1>
|
||||||
|
<div id="login-form">
|
||||||
|
<input type="password" id="password" placeholder="Enter admin password" autofocus onkeypress="if(event.key==='Enter'){document.getElementById('login-btn').click()}">
|
||||||
|
<button type="button" id="login-btn">→ Login</button>
|
||||||
|
</div>
|
||||||
|
<div id="login-error" class="error-msg"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Admin Dashboard -->
|
||||||
|
<div id="admin-dashboard" class="admin-dashboard hidden">
|
||||||
|
<!-- Header -->
|
||||||
|
<header class="admin-header">
|
||||||
|
<div class="header-content">
|
||||||
|
<div class="header-left">
|
||||||
|
<img src="../../assets/images/logo.png" alt="Crawl4AI" class="header-logo">
|
||||||
|
<h1>[ Admin Dashboard ]</h1>
|
||||||
|
</div>
|
||||||
|
<div class="header-right">
|
||||||
|
<span class="admin-user">Administrator</span>
|
||||||
|
<button id="logout-btn" class="logout-btn">↗ Logout</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<!-- Main Layout -->
|
||||||
|
<div class="admin-layout">
|
||||||
|
<!-- Sidebar -->
|
||||||
|
<aside class="admin-sidebar">
|
||||||
|
<nav class="sidebar-nav">
|
||||||
|
<button class="nav-btn active" data-section="stats">
|
||||||
|
<span class="nav-icon">▓</span> Dashboard
|
||||||
|
</button>
|
||||||
|
<button class="nav-btn" data-section="apps">
|
||||||
|
<span class="nav-icon">◆</span> Apps
|
||||||
|
</button>
|
||||||
|
<button class="nav-btn" data-section="articles">
|
||||||
|
<span class="nav-icon">■</span> Articles
|
||||||
|
</button>
|
||||||
|
<button class="nav-btn" data-section="categories">
|
||||||
|
<span class="nav-icon">□</span> Categories
|
||||||
|
</button>
|
||||||
|
<button class="nav-btn" data-section="sponsors">
|
||||||
|
<span class="nav-icon">◆</span> Sponsors
|
||||||
|
</button>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<div class="sidebar-actions">
|
||||||
|
<button id="export-btn" class="action-btn">
|
||||||
|
<span>↓</span> Export Data
|
||||||
|
</button>
|
||||||
|
<button id="backup-btn" class="action-btn">
|
||||||
|
<span>▪</span> Backup DB
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</aside>
|
||||||
|
|
||||||
|
<!-- Main Content -->
|
||||||
|
<main class="admin-main">
|
||||||
|
<!-- Stats Section -->
|
||||||
|
<section id="stats-section" class="content-section active">
|
||||||
|
<h2>Dashboard Overview</h2>
|
||||||
|
<div class="stats-grid">
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="stat-icon">◆</div>
|
||||||
|
<div class="stat-info">
|
||||||
|
<div class="stat-number" id="stat-apps">--</div>
|
||||||
|
<div class="stat-label">Total Apps</div>
|
||||||
|
<div class="stat-detail">
|
||||||
|
<span id="stat-featured">--</span> featured,
|
||||||
|
<span id="stat-sponsored">--</span> sponsored
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="stat-icon">■</div>
|
||||||
|
<div class="stat-info">
|
||||||
|
<div class="stat-number" id="stat-articles">--</div>
|
||||||
|
<div class="stat-label">Articles</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="stat-icon">◆</div>
|
||||||
|
<div class="stat-info">
|
||||||
|
<div class="stat-number" id="stat-sponsors">--</div>
|
||||||
|
<div class="stat-label">Active Sponsors</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="stat-icon">●</div>
|
||||||
|
<div class="stat-info">
|
||||||
|
<div class="stat-number" id="stat-views">--</div>
|
||||||
|
<div class="stat-label">Total Views</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<h3>Quick Actions</h3>
|
||||||
|
<div class="quick-actions">
|
||||||
|
<button class="quick-btn" onclick="admin.showAddForm('apps')">
|
||||||
|
<span>→</span> Add New App
|
||||||
|
</button>
|
||||||
|
<button class="quick-btn" onclick="admin.showAddForm('articles')">
|
||||||
|
<span>→</span> Write Article
|
||||||
|
</button>
|
||||||
|
<button class="quick-btn" onclick="admin.showAddForm('sponsors')">
|
||||||
|
<span>→</span> Add Sponsor
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Apps Section -->
|
||||||
|
<section id="apps-section" class="content-section">
|
||||||
|
<div class="section-header">
|
||||||
|
<h2>Apps Management</h2>
|
||||||
|
<div class="header-actions">
|
||||||
|
<input type="text" id="apps-search" class="search-input" placeholder="Search apps...">
|
||||||
|
<select id="apps-filter" class="filter-select">
|
||||||
|
<option value="">All Categories</option>
|
||||||
|
</select>
|
||||||
|
<button class="add-btn" onclick="admin.showAddForm('apps')">
|
||||||
|
<span>→</span> Add App
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="data-table" id="apps-table">
|
||||||
|
<!-- Apps table will be populated here -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Articles Section -->
|
||||||
|
<section id="articles-section" class="content-section">
|
||||||
|
<div class="section-header">
|
||||||
|
<h2>Articles Management</h2>
|
||||||
|
<div class="header-actions">
|
||||||
|
<input type="text" id="articles-search" class="search-input" placeholder="Search articles...">
|
||||||
|
<button class="add-btn" onclick="admin.showAddForm('articles')">
|
||||||
|
<span>→</span> Add Article
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="data-table" id="articles-table">
|
||||||
|
<!-- Articles table will be populated here -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Categories Section -->
|
||||||
|
<section id="categories-section" class="content-section">
|
||||||
|
<div class="section-header">
|
||||||
|
<h2>Categories Management</h2>
|
||||||
|
<div class="header-actions">
|
||||||
|
<button class="add-btn" onclick="admin.showAddForm('categories')">
|
||||||
|
<span>→</span> Add Category
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="data-table" id="categories-table">
|
||||||
|
<!-- Categories table will be populated here -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Sponsors Section -->
|
||||||
|
<section id="sponsors-section" class="content-section">
|
||||||
|
<div class="section-header">
|
||||||
|
<h2>Sponsors Management</h2>
|
||||||
|
<div class="header-actions">
|
||||||
|
<button class="add-btn" onclick="admin.showAddForm('sponsors')">
|
||||||
|
<span>→</span> Add Sponsor
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="data-table" id="sponsors-table">
|
||||||
|
<!-- Sponsors table will be populated here -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</main>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Modal for Add/Edit Forms -->
|
||||||
|
<div id="form-modal" class="modal hidden">
|
||||||
|
<div class="modal-content large">
|
||||||
|
<div class="modal-header">
|
||||||
|
<h2 id="modal-title">Add/Edit</h2>
|
||||||
|
<button class="modal-close" onclick="admin.closeModal()">✕</button>
|
||||||
|
</div>
|
||||||
|
<div class="modal-body" id="modal-body">
|
||||||
|
<!-- Dynamic form content -->
|
||||||
|
</div>
|
||||||
|
<div class="modal-footer">
|
||||||
|
<button class="btn-cancel" onclick="admin.closeModal()">Cancel</button>
|
||||||
|
<button class="btn-save" id="save-btn">Save</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script src="admin.js?v=1759335000"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
683
docs/md_v2/marketplace/app-detail.css
Normal file
683
docs/md_v2/marketplace/app-detail.css
Normal file
@@ -0,0 +1,683 @@
|
|||||||
|
/* App Detail Page Styles */
|
||||||
|
|
||||||
|
.app-detail-container {
|
||||||
|
min-height: 100vh;
|
||||||
|
background: var(--bg-dark);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Back Button */
|
||||||
|
.header-nav {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.back-btn {
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
text-decoration: none;
|
||||||
|
transition: all 0.2s;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.back-btn:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
background: rgba(80, 255, 255, 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* App Hero Section */
|
||||||
|
.app-hero {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 2rem auto;
|
||||||
|
padding: 0 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-hero-content {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr 2fr;
|
||||||
|
gap: 3rem;
|
||||||
|
background: linear-gradient(135deg, #1a1a2e, #0f0f1e);
|
||||||
|
border: 2px solid var(--primary-cyan);
|
||||||
|
padding: 2rem;
|
||||||
|
box-shadow: 0 0 30px rgba(80, 255, 255, 0.15),
|
||||||
|
inset 0 0 20px rgba(80, 255, 255, 0.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-hero-image {
|
||||||
|
width: 100%;
|
||||||
|
height: 300px;
|
||||||
|
background: linear-gradient(135deg, rgba(80, 255, 255, 0.1), rgba(243, 128, 245, 0.05));
|
||||||
|
background-size: cover;
|
||||||
|
background-position: center;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
font-size: 4rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-badges {
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-badge {
|
||||||
|
padding: 0.3rem 0.6rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 0.75rem;
|
||||||
|
text-transform: uppercase;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-badge.featured {
|
||||||
|
background: linear-gradient(135deg, var(--primary-cyan), var(--primary-teal));
|
||||||
|
color: var(--bg-dark);
|
||||||
|
box-shadow: 0 2px 10px rgba(80, 255, 255, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-badge.sponsored {
|
||||||
|
background: linear-gradient(135deg, var(--warning), #ff8c00);
|
||||||
|
color: var(--bg-dark);
|
||||||
|
box-shadow: 0 2px 10px rgba(245, 158, 11, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-hero-info h1 {
|
||||||
|
font-size: 2.5rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin: 0.5rem 0;
|
||||||
|
text-shadow: 0 0 20px rgba(80, 255, 255, 0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-tagline {
|
||||||
|
font-size: 1.1rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
margin-bottom: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Stats */
|
||||||
|
.app-stats {
|
||||||
|
display: flex;
|
||||||
|
gap: 2rem;
|
||||||
|
margin: 2rem 0;
|
||||||
|
padding: 1rem 0;
|
||||||
|
border-top: 1px solid var(--border-color);
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-value {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-label {
|
||||||
|
font-size: 0.875rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Action Buttons */
|
||||||
|
.app-actions {
|
||||||
|
display: flex;
|
||||||
|
gap: 1rem;
|
||||||
|
margin: 2rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn {
|
||||||
|
padding: 0.75rem 1.5rem;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
background: transparent;
|
||||||
|
color: var(--text-primary);
|
||||||
|
text-decoration: none;
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
transition: all 0.2s;
|
||||||
|
cursor: pointer;
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn.primary {
|
||||||
|
background: linear-gradient(135deg, var(--primary-cyan), var(--primary-teal));
|
||||||
|
color: var(--bg-dark);
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn.primary:hover {
|
||||||
|
box-shadow: 0 4px 15px rgba(80, 255, 255, 0.3);
|
||||||
|
transform: translateY(-2px);
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn.secondary {
|
||||||
|
border-color: var(--accent-pink);
|
||||||
|
color: var(--accent-pink);
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn.secondary:hover {
|
||||||
|
background: rgba(243, 128, 245, 0.1);
|
||||||
|
box-shadow: 0 4px 15px rgba(243, 128, 245, 0.2);
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn.ghost {
|
||||||
|
border-color: var(--border-color);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn.ghost:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pricing */
|
||||||
|
.pricing-info {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 1rem;
|
||||||
|
font-size: 1.1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.pricing-label {
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.pricing-value {
|
||||||
|
color: var(--warning);
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Navigation Tabs */
|
||||||
|
.tabs {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: row;
|
||||||
|
gap: 0;
|
||||||
|
border-bottom: 2px solid var(--border-color);
|
||||||
|
margin-bottom: 0;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.tab-btn {
|
||||||
|
padding: 1rem 2rem;
|
||||||
|
background: transparent;
|
||||||
|
border: none;
|
||||||
|
border-bottom: 3px solid transparent;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 0.95rem;
|
||||||
|
margin-bottom: -2px;
|
||||||
|
white-space: nowrap;
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tab-btn:hover {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
background: rgba(80, 255, 255, 0.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.tab-btn.active {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
border-bottom-color: var(--primary-cyan);
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-nav {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 2rem auto 0;
|
||||||
|
padding: 0 2rem;
|
||||||
|
display: flex;
|
||||||
|
gap: 1rem;
|
||||||
|
border-bottom: 2px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-tab {
|
||||||
|
padding: 1rem 1.5rem;
|
||||||
|
background: transparent;
|
||||||
|
border: none;
|
||||||
|
border-bottom: 2px solid transparent;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
margin-bottom: -2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-tab:hover {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-tab.active {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
border-bottom-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Main Content Wrapper */
|
||||||
|
.app-main {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 2rem auto;
|
||||||
|
padding: 0 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Content Sections */
|
||||||
|
.app-content {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tab-content {
|
||||||
|
display: none !important;
|
||||||
|
padding: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tab-content.active {
|
||||||
|
display: block !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Overview Layout */
|
||||||
|
.overview-columns {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 2fr 1fr;
|
||||||
|
gap: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.overview-main h2, .overview-main h3 {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin-top: 2rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.overview-main h2:first-child {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.overview-main h2 {
|
||||||
|
font-size: 1.8rem;
|
||||||
|
border-bottom: 2px solid var(--border-color);
|
||||||
|
padding-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.overview-main h3 {
|
||||||
|
font-size: 1.3rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.features-list {
|
||||||
|
list-style: none;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.features-list li {
|
||||||
|
padding: 0.5rem 0;
|
||||||
|
padding-left: 1.5rem;
|
||||||
|
position: relative;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.features-list li:before {
|
||||||
|
content: "▸";
|
||||||
|
position: absolute;
|
||||||
|
left: 0;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.use-cases p {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
line-height: 1.6;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sidebar */
|
||||||
|
.sidebar {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-card {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
padding: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-card h3 {
|
||||||
|
font-size: 1.1rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin: 0 0 1rem 0;
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
padding-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stats-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr 1fr;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stats-grid > div {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.metadata {
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.metadata div {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
padding: 0.75rem 0;
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.metadata dt {
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
font-weight: normal;
|
||||||
|
}
|
||||||
|
|
||||||
|
.metadata dd {
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin: 0;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-card p {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Integration Content */
|
||||||
|
.integration-content {
|
||||||
|
max-width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.integration-content h2 {
|
||||||
|
font-size: 1.8rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin: 0 0 2rem 0;
|
||||||
|
padding-bottom: 0.5rem;
|
||||||
|
border-bottom: 2px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.integration-content h3 {
|
||||||
|
font-size: 1.3rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin: 2rem 0 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.docs-content {
|
||||||
|
max-width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.docs-content h2 {
|
||||||
|
font-size: 1.8rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin: 0 0 1.5rem 0;
|
||||||
|
padding-bottom: 0.5rem;
|
||||||
|
border-bottom: 2px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.docs-content h3 {
|
||||||
|
font-size: 1.3rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin: 2rem 0 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.docs-content h4 {
|
||||||
|
font-size: 1.1rem;
|
||||||
|
color: var(--accent-pink);
|
||||||
|
margin: 1.5rem 0 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.docs-content p {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
line-height: 1.6;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.docs-content code {
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
padding: 0.2rem 0.4rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
font-family: 'Dank Mono', Monaco, monospace;
|
||||||
|
font-size: 0.9em;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Code Blocks */
|
||||||
|
.code-block {
|
||||||
|
background: var(--bg-dark);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
margin: 1rem 0;
|
||||||
|
overflow: hidden;
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
|
||||||
|
.code-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.code-lang {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
font-size: 0.875rem;
|
||||||
|
text-transform: uppercase;
|
||||||
|
}
|
||||||
|
|
||||||
|
.copy-btn {
|
||||||
|
position: absolute;
|
||||||
|
top: 0.5rem;
|
||||||
|
right: 0.5rem;
|
||||||
|
padding: 0.4rem 0.8rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
transition: all 0.2s;
|
||||||
|
z-index: 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
.copy-btn:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.code-block pre {
|
||||||
|
margin: 0;
|
||||||
|
padding: 1rem;
|
||||||
|
overflow-x: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.code-block code {
|
||||||
|
background: transparent;
|
||||||
|
padding: 0;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 0.875rem;
|
||||||
|
line-height: 1.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Markdown rendered code blocks */
|
||||||
|
.integration-content pre,
|
||||||
|
.docs-content pre {
|
||||||
|
background: var(--bg-dark);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
margin: 1rem 0;
|
||||||
|
padding: 1rem;
|
||||||
|
padding-top: 2.5rem; /* Space for copy button */
|
||||||
|
overflow-x: auto;
|
||||||
|
position: relative;
|
||||||
|
max-height: none; /* Remove any height restrictions */
|
||||||
|
height: auto; /* Allow content to expand */
|
||||||
|
}
|
||||||
|
|
||||||
|
.integration-content pre code,
|
||||||
|
.docs-content pre code {
|
||||||
|
background: transparent;
|
||||||
|
padding: 0;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 0.875rem;
|
||||||
|
line-height: 1.5;
|
||||||
|
white-space: pre; /* Preserve whitespace and line breaks */
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Feature Grid */
|
||||||
|
.feature-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
||||||
|
gap: 1rem;
|
||||||
|
margin: 2rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feature-card {
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
padding: 1.5rem;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feature-card:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
background: rgba(80, 255, 255, 0.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.feature-card h4 {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Info Box */
|
||||||
|
.info-box {
|
||||||
|
background: linear-gradient(135deg, rgba(80, 255, 255, 0.05), rgba(243, 128, 245, 0.03));
|
||||||
|
border: 1px solid var(--primary-cyan);
|
||||||
|
border-left: 4px solid var(--primary-cyan);
|
||||||
|
padding: 1.5rem;
|
||||||
|
margin: 2rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-box h4 {
|
||||||
|
margin-top: 0;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Support Grid */
|
||||||
|
.support-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
||||||
|
gap: 1rem;
|
||||||
|
margin: 2rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.support-card {
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
padding: 1.5rem;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.support-card h3 {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Related Apps */
|
||||||
|
.related-apps {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 4rem auto;
|
||||||
|
padding: 0 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.related-apps h2 {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin-bottom: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.related-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fill, minmax(250px, 1fr));
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.related-app-card {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
padding: 1rem;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.related-app-card:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
transform: translateY(-2px);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive */
|
||||||
|
@media (max-width: 1024px) {
|
||||||
|
.app-hero-content {
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-stats {
|
||||||
|
justify-content: space-around;
|
||||||
|
}
|
||||||
|
|
||||||
|
.overview-columns {
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: 768px) {
|
||||||
|
.app-hero-info h1 {
|
||||||
|
font-size: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-actions {
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tabs {
|
||||||
|
overflow-x: auto;
|
||||||
|
-webkit-overflow-scrolling: touch;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tab-btn {
|
||||||
|
padding: 0.75rem 1.5rem;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-nav {
|
||||||
|
overflow-x: auto;
|
||||||
|
gap: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-tab {
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feature-grid,
|
||||||
|
.support-grid {
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tab-content {
|
||||||
|
padding: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-main {
|
||||||
|
padding: 0 1rem;
|
||||||
|
}
|
||||||
|
}
|
||||||
175
docs/md_v2/marketplace/app-detail.html
Normal file
175
docs/md_v2/marketplace/app-detail.html
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en" data-theme="dark">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>App Details - Crawl4AI Marketplace</title>
|
||||||
|
<link rel="stylesheet" href="marketplace.css">
|
||||||
|
<link rel="stylesheet" href="app-detail.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="app-detail-container">
|
||||||
|
<!-- Header -->
|
||||||
|
<header class="marketplace-header">
|
||||||
|
<div class="header-content">
|
||||||
|
<div class="header-left">
|
||||||
|
<div class="logo-title">
|
||||||
|
<img src="../assets/images/logo.png" alt="Crawl4AI" class="header-logo">
|
||||||
|
<h1>
|
||||||
|
<span class="ascii-border">[</span>
|
||||||
|
Marketplace
|
||||||
|
<span class="ascii-border">]</span>
|
||||||
|
</h1>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="header-nav">
|
||||||
|
<a href="index.html" class="back-btn">← Back to Marketplace</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<!-- App Hero Section -->
|
||||||
|
<section class="app-hero">
|
||||||
|
<div class="app-hero-content">
|
||||||
|
<div class="app-hero-image" id="app-image">
|
||||||
|
<!-- Dynamic image -->
|
||||||
|
</div>
|
||||||
|
<div class="app-hero-info">
|
||||||
|
<div class="app-badges">
|
||||||
|
<span class="app-badge" id="app-type">Open Source</span>
|
||||||
|
<span class="app-badge featured" id="app-featured" style="display:none">FEATURED</span>
|
||||||
|
<span class="app-badge sponsored" id="app-sponsored" style="display:none">SPONSORED</span>
|
||||||
|
</div>
|
||||||
|
<h1 id="app-name">App Name</h1>
|
||||||
|
<p id="app-description" class="app-tagline">App description goes here</p>
|
||||||
|
|
||||||
|
<div class="app-stats">
|
||||||
|
<div class="stat">
|
||||||
|
<span class="stat-value" id="app-rating">★★★★★</span>
|
||||||
|
<span class="stat-label">Rating</span>
|
||||||
|
</div>
|
||||||
|
<div class="stat">
|
||||||
|
<span class="stat-value" id="app-downloads">0</span>
|
||||||
|
<span class="stat-label">Downloads</span>
|
||||||
|
</div>
|
||||||
|
<div class="stat">
|
||||||
|
<span class="stat-value" id="app-category">Category</span>
|
||||||
|
<span class="stat-label">Category</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="app-actions">
|
||||||
|
<a href="#" id="app-website" class="action-btn primary" target="_blank">Visit Website</a>
|
||||||
|
<a href="#" id="app-github" class="action-btn" target="_blank">View GitHub</a>
|
||||||
|
<a href="#" id="app-demo" class="action-btn" target="_blank" style="display:none">Live Demo</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- App Details Section -->
|
||||||
|
<main class="app-main">
|
||||||
|
<div class="app-content">
|
||||||
|
<div class="tabs">
|
||||||
|
<button class="tab-btn active" data-tab="overview">Overview</button>
|
||||||
|
<button class="tab-btn" data-tab="integration">Integration</button>
|
||||||
|
<!-- <button class="tab-btn" data-tab="docs">Documentation</button>
|
||||||
|
<button class="tab-btn" data-tab="support">Support</button> -->
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<section id="overview-tab" class="tab-content active">
|
||||||
|
<div class="overview-columns">
|
||||||
|
<div class="overview-main">
|
||||||
|
<div id="app-overview">Overview content goes here.</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<aside class="sidebar">
|
||||||
|
<div class="sidebar-card">
|
||||||
|
<h3>Download Stats</h3>
|
||||||
|
<div class="stats-grid">
|
||||||
|
<div>
|
||||||
|
<span class="stat-value" id="sidebar-downloads">0</span>
|
||||||
|
<span class="stat-label">Downloads</span>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<span class="stat-value" id="sidebar-rating">0.0</span>
|
||||||
|
<span class="stat-label">Rating</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="sidebar-card">
|
||||||
|
<h3>App Metadata</h3>
|
||||||
|
<dl class="metadata">
|
||||||
|
<div>
|
||||||
|
<dt>Category</dt>
|
||||||
|
<dd id="sidebar-category">-</dd>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<dt>Type</dt>
|
||||||
|
<dd id="sidebar-type">-</dd>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<dt>Status</dt>
|
||||||
|
<dd id="sidebar-status">Active</dd>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<dt>Pricing</dt>
|
||||||
|
<dd id="sidebar-pricing">-</dd>
|
||||||
|
</div>
|
||||||
|
</dl>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="sidebar-card">
|
||||||
|
<h3>Contact</h3>
|
||||||
|
<p id="sidebar-contact">contact@example.com</p>
|
||||||
|
</div>
|
||||||
|
</aside>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section id="integration-tab" class="tab-content">
|
||||||
|
<div class="integration-content" id="app-integration">
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- <section id="docs-tab" class="tab-content">
|
||||||
|
<div class="docs-content" id="app-docs">
|
||||||
|
</div>
|
||||||
|
</section> -->
|
||||||
|
|
||||||
|
<!-- <section id="support-tab" class="tab-content">
|
||||||
|
<div class="docs-content">
|
||||||
|
<h2>Support</h2>
|
||||||
|
<div class="support-grid">
|
||||||
|
<div class="support-card">
|
||||||
|
<h3>📧 Contact</h3>
|
||||||
|
<p id="app-contact">contact@example.com</p>
|
||||||
|
</div>
|
||||||
|
<div class="support-card">
|
||||||
|
<h3>🐛 Report Issues</h3>
|
||||||
|
<p>Found a bug? Report it on GitHub Issues.</p>
|
||||||
|
</div>
|
||||||
|
<div class="support-card">
|
||||||
|
<h3>💬 Community</h3>
|
||||||
|
<p>Join our Discord for help and discussions.</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section> -->
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<!-- Related Apps -->
|
||||||
|
<section class="related-apps">
|
||||||
|
<h2>Related Apps</h2>
|
||||||
|
<div id="related-apps-grid" class="related-grid">
|
||||||
|
<!-- Dynamic related apps -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script src="app-detail.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
318
docs/md_v2/marketplace/app-detail.js
Normal file
318
docs/md_v2/marketplace/app-detail.js
Normal file
@@ -0,0 +1,318 @@
|
|||||||
|
// App Detail Page JavaScript
|
||||||
|
const { API_BASE, API_ORIGIN } = (() => {
|
||||||
|
const { hostname, port, protocol } = window.location;
|
||||||
|
const isLocalHost = ['localhost', '127.0.0.1', '0.0.0.0'].includes(hostname);
|
||||||
|
|
||||||
|
if (isLocalHost && port && port !== '8100') {
|
||||||
|
const origin = `${protocol}//127.0.0.1:8100`;
|
||||||
|
return { API_BASE: `${origin}/marketplace/api`, API_ORIGIN: origin };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { API_BASE: '/marketplace/api', API_ORIGIN: '' };
|
||||||
|
})();
|
||||||
|
|
||||||
|
class AppDetailPage {
|
||||||
|
constructor() {
|
||||||
|
this.appSlug = this.getAppSlugFromURL();
|
||||||
|
this.appData = null;
|
||||||
|
this.init();
|
||||||
|
}
|
||||||
|
|
||||||
|
getAppSlugFromURL() {
|
||||||
|
const params = new URLSearchParams(window.location.search);
|
||||||
|
return params.get('app') || '';
|
||||||
|
}
|
||||||
|
|
||||||
|
async init() {
|
||||||
|
if (!this.appSlug) {
|
||||||
|
window.location.href = 'index.html';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.loadAppDetails();
|
||||||
|
this.setupEventListeners();
|
||||||
|
await this.loadRelatedApps();
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadAppDetails() {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${API_BASE}/apps/${this.appSlug}`);
|
||||||
|
if (!response.ok) throw new Error('App not found');
|
||||||
|
|
||||||
|
this.appData = await response.json();
|
||||||
|
this.renderAppDetails();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error loading app details:', error);
|
||||||
|
// Fallback to loading all apps and finding the right one
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${API_BASE}/apps`);
|
||||||
|
const apps = await response.json();
|
||||||
|
this.appData = apps.find(app => app.slug === this.appSlug || app.name.toLowerCase().replace(/\s+/g, '-') === this.appSlug);
|
||||||
|
if (this.appData) {
|
||||||
|
this.renderAppDetails();
|
||||||
|
} else {
|
||||||
|
window.location.href = 'index.html';
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Error loading apps:', err);
|
||||||
|
window.location.href = 'index.html';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
renderAppDetails() {
|
||||||
|
if (!this.appData) return;
|
||||||
|
|
||||||
|
// Update title
|
||||||
|
document.title = `${this.appData.name} - Crawl4AI Marketplace`;
|
||||||
|
|
||||||
|
// Hero image
|
||||||
|
const appImage = document.getElementById('app-image');
|
||||||
|
if (this.appData.image) {
|
||||||
|
appImage.style.backgroundImage = `url('${this.appData.image}')`;
|
||||||
|
appImage.innerHTML = '';
|
||||||
|
} else {
|
||||||
|
appImage.innerHTML = `[${this.appData.category || 'APP'}]`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic info
|
||||||
|
document.getElementById('app-name').textContent = this.appData.name;
|
||||||
|
document.getElementById('app-description').textContent = this.appData.description;
|
||||||
|
document.getElementById('app-type').textContent = this.appData.type || 'Open Source';
|
||||||
|
document.getElementById('app-category').textContent = this.appData.category;
|
||||||
|
|
||||||
|
// Badges
|
||||||
|
if (this.appData.featured) {
|
||||||
|
document.getElementById('app-featured').style.display = 'inline-block';
|
||||||
|
}
|
||||||
|
if (this.appData.sponsored) {
|
||||||
|
document.getElementById('app-sponsored').style.display = 'inline-block';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stats
|
||||||
|
const rating = this.appData.rating || 0;
|
||||||
|
const stars = '★'.repeat(Math.floor(rating)) + '☆'.repeat(5 - Math.floor(rating));
|
||||||
|
document.getElementById('app-rating').textContent = stars + ` ${rating}/5`;
|
||||||
|
document.getElementById('app-downloads').textContent = this.formatNumber(this.appData.downloads || 0);
|
||||||
|
|
||||||
|
// Action buttons
|
||||||
|
const websiteBtn = document.getElementById('app-website');
|
||||||
|
const githubBtn = document.getElementById('app-github');
|
||||||
|
|
||||||
|
if (this.appData.website_url) {
|
||||||
|
websiteBtn.href = this.appData.website_url;
|
||||||
|
} else {
|
||||||
|
websiteBtn.style.display = 'none';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.appData.github_url) {
|
||||||
|
githubBtn.href = this.appData.github_url;
|
||||||
|
} else {
|
||||||
|
githubBtn.style.display = 'none';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Contact
|
||||||
|
document.getElementById('app-contact') && (document.getElementById('app-contact').textContent = this.appData.contact_email || 'Not available');
|
||||||
|
|
||||||
|
// Sidebar info
|
||||||
|
document.getElementById('sidebar-downloads').textContent = this.formatNumber(this.appData.downloads || 0);
|
||||||
|
document.getElementById('sidebar-rating').textContent = (this.appData.rating || 0).toFixed(1);
|
||||||
|
document.getElementById('sidebar-category').textContent = this.appData.category || '-';
|
||||||
|
document.getElementById('sidebar-type').textContent = this.appData.type || '-';
|
||||||
|
document.getElementById('sidebar-status').textContent = this.appData.status || 'Active';
|
||||||
|
document.getElementById('sidebar-pricing').textContent = this.appData.pricing || 'Free';
|
||||||
|
document.getElementById('sidebar-contact').textContent = this.appData.contact_email || 'contact@example.com';
|
||||||
|
|
||||||
|
// Render tab contents from database fields
|
||||||
|
this.renderTabContents();
|
||||||
|
}
|
||||||
|
|
||||||
|
renderTabContents() {
|
||||||
|
// Overview tab - use long_description from database
|
||||||
|
const overviewDiv = document.getElementById('app-overview');
|
||||||
|
if (overviewDiv) {
|
||||||
|
if (this.appData.long_description) {
|
||||||
|
overviewDiv.innerHTML = this.renderMarkdown(this.appData.long_description);
|
||||||
|
} else {
|
||||||
|
overviewDiv.innerHTML = `<p>${this.appData.description || 'No overview available.'}</p>`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Integration tab - use integration_guide field from database
|
||||||
|
const integrationDiv = document.getElementById('app-integration');
|
||||||
|
if (integrationDiv) {
|
||||||
|
if (this.appData.integration_guide) {
|
||||||
|
integrationDiv.innerHTML = this.renderMarkdown(this.appData.integration_guide);
|
||||||
|
// Add copy buttons to all code blocks
|
||||||
|
this.addCopyButtonsToCodeBlocks(integrationDiv);
|
||||||
|
} else {
|
||||||
|
integrationDiv.innerHTML = '<p>Integration guide not yet available. Please check the official website for details.</p>';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Documentation tab - use documentation field from database
|
||||||
|
const docsDiv = document.getElementById('app-docs');
|
||||||
|
if (docsDiv) {
|
||||||
|
if (this.appData.documentation) {
|
||||||
|
docsDiv.innerHTML = this.renderMarkdown(this.appData.documentation);
|
||||||
|
// Add copy buttons to all code blocks
|
||||||
|
this.addCopyButtonsToCodeBlocks(docsDiv);
|
||||||
|
} else {
|
||||||
|
docsDiv.innerHTML = '<p>Documentation coming soon.</p>';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
addCopyButtonsToCodeBlocks(container) {
|
||||||
|
// Find all code blocks and add copy buttons
|
||||||
|
const codeBlocks = container.querySelectorAll('pre code');
|
||||||
|
codeBlocks.forEach(codeBlock => {
|
||||||
|
const pre = codeBlock.parentElement;
|
||||||
|
|
||||||
|
// Skip if already has a copy button
|
||||||
|
if (pre.querySelector('.copy-btn')) return;
|
||||||
|
|
||||||
|
// Create copy button
|
||||||
|
const copyBtn = document.createElement('button');
|
||||||
|
copyBtn.className = 'copy-btn';
|
||||||
|
copyBtn.textContent = 'Copy';
|
||||||
|
copyBtn.onclick = () => {
|
||||||
|
navigator.clipboard.writeText(codeBlock.textContent).then(() => {
|
||||||
|
copyBtn.textContent = '✓ Copied!';
|
||||||
|
setTimeout(() => {
|
||||||
|
copyBtn.textContent = 'Copy';
|
||||||
|
}, 2000);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add button to pre element
|
||||||
|
pre.style.position = 'relative';
|
||||||
|
pre.insertBefore(copyBtn, codeBlock);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
renderMarkdown(text) {
|
||||||
|
if (!text) return '';
|
||||||
|
|
||||||
|
// Store code blocks temporarily to protect them from processing
|
||||||
|
const codeBlocks = [];
|
||||||
|
let processed = text.replace(/```(\w+)?\n([\s\S]*?)```/g, (match, lang, code) => {
|
||||||
|
const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;
|
||||||
|
codeBlocks.push(`<pre><code class="language-${lang || ''}">${this.escapeHtml(code)}</code></pre>`);
|
||||||
|
return placeholder;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Store inline code temporarily
|
||||||
|
const inlineCodes = [];
|
||||||
|
processed = processed.replace(/`([^`]+)`/g, (match, code) => {
|
||||||
|
const placeholder = `___INLINE_CODE_${inlineCodes.length}___`;
|
||||||
|
inlineCodes.push(`<code>${this.escapeHtml(code)}</code>`);
|
||||||
|
return placeholder;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Now process the rest of the markdown
|
||||||
|
processed = processed
|
||||||
|
// Headers
|
||||||
|
.replace(/^### (.*$)/gim, '<h3>$1</h3>')
|
||||||
|
.replace(/^## (.*$)/gim, '<h2>$1</h2>')
|
||||||
|
.replace(/^# (.*$)/gim, '<h1>$1</h1>')
|
||||||
|
// Bold
|
||||||
|
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
|
||||||
|
// Italic
|
||||||
|
.replace(/\*(.*?)\*/g, '<em>$1</em>')
|
||||||
|
// Links
|
||||||
|
.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" target="_blank">$1</a>')
|
||||||
|
// Line breaks
|
||||||
|
.replace(/\n\n/g, '</p><p>')
|
||||||
|
.replace(/\n/g, '<br>')
|
||||||
|
// Lists
|
||||||
|
.replace(/^\* (.*)$/gim, '<li>$1</li>')
|
||||||
|
.replace(/^- (.*)$/gim, '<li>$1</li>')
|
||||||
|
// Wrap in paragraphs
|
||||||
|
.replace(/^(?!<[h|p|pre|ul|ol|li])/gim, '<p>')
|
||||||
|
.replace(/(?<![>])$/gim, '</p>');
|
||||||
|
|
||||||
|
// Restore inline code
|
||||||
|
inlineCodes.forEach((code, i) => {
|
||||||
|
processed = processed.replace(`___INLINE_CODE_${i}___`, code);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Restore code blocks
|
||||||
|
codeBlocks.forEach((block, i) => {
|
||||||
|
processed = processed.replace(`___CODE_BLOCK_${i}___`, block);
|
||||||
|
});
|
||||||
|
|
||||||
|
return processed;
|
||||||
|
}
|
||||||
|
|
||||||
|
escapeHtml(text) {
|
||||||
|
const div = document.createElement('div');
|
||||||
|
div.textContent = text;
|
||||||
|
return div.innerHTML;
|
||||||
|
}
|
||||||
|
|
||||||
|
formatNumber(num) {
|
||||||
|
if (num >= 1000000) {
|
||||||
|
return (num / 1000000).toFixed(1) + 'M';
|
||||||
|
} else if (num >= 1000) {
|
||||||
|
return (num / 1000).toFixed(1) + 'K';
|
||||||
|
}
|
||||||
|
return num.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
setupEventListeners() {
|
||||||
|
// Tab switching
|
||||||
|
const tabs = document.querySelectorAll('.tab-btn');
|
||||||
|
|
||||||
|
tabs.forEach(tab => {
|
||||||
|
tab.addEventListener('click', () => {
|
||||||
|
// Update active tab button
|
||||||
|
tabs.forEach(t => t.classList.remove('active'));
|
||||||
|
tab.classList.add('active');
|
||||||
|
|
||||||
|
// Show corresponding content
|
||||||
|
const tabName = tab.dataset.tab;
|
||||||
|
|
||||||
|
// Hide all tab contents
|
||||||
|
const allTabContents = document.querySelectorAll('.tab-content');
|
||||||
|
allTabContents.forEach(content => {
|
||||||
|
content.classList.remove('active');
|
||||||
|
});
|
||||||
|
|
||||||
|
// Show the selected tab content
|
||||||
|
const targetTab = document.getElementById(`${tabName}-tab`);
|
||||||
|
if (targetTab) {
|
||||||
|
targetTab.classList.add('active');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadRelatedApps() {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${API_BASE}/apps?category=${encodeURIComponent(this.appData.category)}&limit=4`);
|
||||||
|
const apps = await response.json();
|
||||||
|
|
||||||
|
const relatedApps = apps.filter(app => app.slug !== this.appSlug).slice(0, 3);
|
||||||
|
const grid = document.getElementById('related-apps-grid');
|
||||||
|
|
||||||
|
grid.innerHTML = relatedApps.map(app => `
|
||||||
|
<div class="related-app-card" onclick="window.location.href='app-detail.html?app=${app.slug || app.name.toLowerCase().replace(/\s+/g, '-')}'">
|
||||||
|
<h4>${app.name}</h4>
|
||||||
|
<p>${app.description.substring(0, 100)}...</p>
|
||||||
|
<div style="display: flex; justify-content: space-between; margin-top: 0.5rem; font-size: 0.75rem;">
|
||||||
|
<span style="color: var(--primary-cyan)">${app.type}</span>
|
||||||
|
<span style="color: var(--warning)">★ ${app.rating}/5</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error loading related apps:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize when DOM is loaded
|
||||||
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
|
new AppDetailPage();
|
||||||
|
});
|
||||||
14
docs/md_v2/marketplace/backend/.env.example
Normal file
14
docs/md_v2/marketplace/backend/.env.example
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
# Marketplace Configuration
|
||||||
|
# Copy this to .env and update with your values
|
||||||
|
|
||||||
|
# Admin password (required)
|
||||||
|
MARKETPLACE_ADMIN_PASSWORD=change_this_password
|
||||||
|
|
||||||
|
# JWT secret key (required) - generate with: python3 -c "import secrets; print(secrets.token_urlsafe(32))"
|
||||||
|
MARKETPLACE_JWT_SECRET=change_this_to_a_secure_random_key
|
||||||
|
|
||||||
|
# Database path (optional, defaults to ./marketplace.db)
|
||||||
|
MARKETPLACE_DB_PATH=./marketplace.db
|
||||||
|
|
||||||
|
# Token expiry in hours (optional, defaults to 4)
|
||||||
|
MARKETPLACE_TOKEN_EXPIRY=4
|
||||||
59
docs/md_v2/marketplace/backend/config.py
Normal file
59
docs/md_v2/marketplace/backend/config.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
"""
|
||||||
|
Marketplace Configuration - Loads from .env file
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import hashlib
|
||||||
|
from pathlib import Path
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Load .env file
|
||||||
|
env_path = Path(__file__).parent / '.env'
|
||||||
|
if not env_path.exists():
|
||||||
|
print("\n❌ ERROR: No .env file found!")
|
||||||
|
print("Please copy .env.example to .env and update with your values:")
|
||||||
|
print(f" cp {Path(__file__).parent}/.env.example {Path(__file__).parent}/.env")
|
||||||
|
print("\nThen edit .env with your secure values.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
load_dotenv(env_path)
|
||||||
|
|
||||||
|
# Required environment variables
|
||||||
|
required_vars = ['MARKETPLACE_ADMIN_PASSWORD', 'MARKETPLACE_JWT_SECRET']
|
||||||
|
missing_vars = [var for var in required_vars if not os.getenv(var)]
|
||||||
|
|
||||||
|
if missing_vars:
|
||||||
|
print(f"\n❌ ERROR: Missing required environment variables: {', '.join(missing_vars)}")
|
||||||
|
print("Please check your .env file and ensure all required variables are set.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
"""Configuration loaded from environment variables"""
|
||||||
|
|
||||||
|
# Admin authentication - hashed from password in .env
|
||||||
|
ADMIN_PASSWORD_HASH = hashlib.sha256(
|
||||||
|
os.getenv('MARKETPLACE_ADMIN_PASSWORD').encode()
|
||||||
|
).hexdigest()
|
||||||
|
|
||||||
|
# JWT secret for token generation
|
||||||
|
JWT_SECRET_KEY = os.getenv('MARKETPLACE_JWT_SECRET')
|
||||||
|
|
||||||
|
# Database path
|
||||||
|
DATABASE_PATH = os.getenv('MARKETPLACE_DB_PATH', './marketplace.db')
|
||||||
|
|
||||||
|
# Token expiry in hours
|
||||||
|
TOKEN_EXPIRY_HOURS = int(os.getenv('MARKETPLACE_TOKEN_EXPIRY', '4'))
|
||||||
|
|
||||||
|
# CORS origins - hardcoded as they don't contain secrets
|
||||||
|
ALLOWED_ORIGINS = [
|
||||||
|
"http://localhost:8000",
|
||||||
|
"http://localhost:8080",
|
||||||
|
"http://localhost:8100",
|
||||||
|
"http://127.0.0.1:8000",
|
||||||
|
"http://127.0.0.1:8080",
|
||||||
|
"http://127.0.0.1:8100",
|
||||||
|
"https://crawl4ai.com",
|
||||||
|
"https://www.crawl4ai.com",
|
||||||
|
"https://docs.crawl4ai.com",
|
||||||
|
"https://market.crawl4ai.com"
|
||||||
|
]
|
||||||
117
docs/md_v2/marketplace/backend/database.py
Normal file
117
docs/md_v2/marketplace/backend/database.py
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
import sqlite3
|
||||||
|
import yaml
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Any
|
||||||
|
|
||||||
|
class DatabaseManager:
|
||||||
|
def __init__(self, db_path=None, schema_path='schema.yaml'):
|
||||||
|
self.schema = self._load_schema(schema_path)
|
||||||
|
# Use provided path or fallback to schema default
|
||||||
|
self.db_path = db_path or self.schema['database']['name']
|
||||||
|
self.conn = None
|
||||||
|
self._init_database()
|
||||||
|
|
||||||
|
def _load_schema(self, path: str) -> Dict:
|
||||||
|
with open(path, 'r') as f:
|
||||||
|
return yaml.safe_load(f)
|
||||||
|
|
||||||
|
def _init_database(self):
|
||||||
|
"""Auto-create/migrate database from schema"""
|
||||||
|
self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
|
||||||
|
self.conn.row_factory = sqlite3.Row
|
||||||
|
|
||||||
|
for table_name, table_def in self.schema['tables'].items():
|
||||||
|
self._create_or_update_table(table_name, table_def['columns'])
|
||||||
|
|
||||||
|
def _create_or_update_table(self, table_name: str, columns: Dict):
|
||||||
|
cursor = self.conn.cursor()
|
||||||
|
|
||||||
|
# Check if table exists
|
||||||
|
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name=?", (table_name,))
|
||||||
|
table_exists = cursor.fetchone() is not None
|
||||||
|
|
||||||
|
if not table_exists:
|
||||||
|
# Create table
|
||||||
|
col_defs = []
|
||||||
|
for col_name, col_spec in columns.items():
|
||||||
|
col_def = f"{col_name} {col_spec['type']}"
|
||||||
|
if col_spec.get('primary'):
|
||||||
|
col_def += " PRIMARY KEY"
|
||||||
|
if col_spec.get('autoincrement'):
|
||||||
|
col_def += " AUTOINCREMENT"
|
||||||
|
if col_spec.get('unique'):
|
||||||
|
col_def += " UNIQUE"
|
||||||
|
if col_spec.get('required'):
|
||||||
|
col_def += " NOT NULL"
|
||||||
|
if 'default' in col_spec:
|
||||||
|
default = col_spec['default']
|
||||||
|
if default == 'CURRENT_TIMESTAMP':
|
||||||
|
col_def += f" DEFAULT {default}"
|
||||||
|
elif isinstance(default, str):
|
||||||
|
col_def += f" DEFAULT '{default}'"
|
||||||
|
else:
|
||||||
|
col_def += f" DEFAULT {default}"
|
||||||
|
col_defs.append(col_def)
|
||||||
|
|
||||||
|
create_sql = f"CREATE TABLE {table_name} ({', '.join(col_defs)})"
|
||||||
|
cursor.execute(create_sql)
|
||||||
|
else:
|
||||||
|
# Check for new columns and add them
|
||||||
|
cursor.execute(f"PRAGMA table_info({table_name})")
|
||||||
|
existing_columns = {row[1] for row in cursor.fetchall()}
|
||||||
|
|
||||||
|
for col_name, col_spec in columns.items():
|
||||||
|
if col_name not in existing_columns:
|
||||||
|
col_def = f"{col_spec['type']}"
|
||||||
|
if 'default' in col_spec:
|
||||||
|
default = col_spec['default']
|
||||||
|
if default == 'CURRENT_TIMESTAMP':
|
||||||
|
col_def += f" DEFAULT {default}"
|
||||||
|
elif isinstance(default, str):
|
||||||
|
col_def += f" DEFAULT '{default}'"
|
||||||
|
else:
|
||||||
|
col_def += f" DEFAULT {default}"
|
||||||
|
|
||||||
|
cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN {col_name} {col_def}")
|
||||||
|
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
def get_all(self, table: str, limit: int = 100, offset: int = 0, where: str = None) -> List[Dict]:
|
||||||
|
cursor = self.conn.cursor()
|
||||||
|
query = f"SELECT * FROM {table}"
|
||||||
|
if where:
|
||||||
|
query += f" WHERE {where}"
|
||||||
|
query += f" LIMIT {limit} OFFSET {offset}"
|
||||||
|
|
||||||
|
cursor.execute(query)
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
return [dict(row) for row in rows]
|
||||||
|
|
||||||
|
def search(self, query: str, tables: List[str] = None) -> Dict[str, List[Dict]]:
|
||||||
|
if not tables:
|
||||||
|
tables = list(self.schema['tables'].keys())
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
cursor = self.conn.cursor()
|
||||||
|
|
||||||
|
for table in tables:
|
||||||
|
# Search in text columns
|
||||||
|
columns = self.schema['tables'][table]['columns']
|
||||||
|
text_cols = [col for col, spec in columns.items()
|
||||||
|
if spec['type'] == 'TEXT' and col != 'id']
|
||||||
|
|
||||||
|
if text_cols:
|
||||||
|
where_clause = ' OR '.join([f"{col} LIKE ?" for col in text_cols])
|
||||||
|
params = [f'%{query}%'] * len(text_cols)
|
||||||
|
|
||||||
|
cursor.execute(f"SELECT * FROM {table} WHERE {where_clause} LIMIT 10", params)
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
if rows:
|
||||||
|
results[table] = [dict(row) for row in rows]
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
if self.conn:
|
||||||
|
self.conn.close()
|
||||||
267
docs/md_v2/marketplace/backend/dummy_data.py
Normal file
267
docs/md_v2/marketplace/backend/dummy_data.py
Normal file
@@ -0,0 +1,267 @@
|
|||||||
|
import sqlite3
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from database import DatabaseManager
|
||||||
|
|
||||||
|
def generate_slug(text):
|
||||||
|
return text.lower().replace(' ', '-').replace('&', 'and')
|
||||||
|
|
||||||
|
def generate_dummy_data():
|
||||||
|
db = DatabaseManager()
|
||||||
|
conn = db.conn
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Clear existing data
|
||||||
|
for table in ['apps', 'articles', 'categories', 'sponsors']:
|
||||||
|
cursor.execute(f"DELETE FROM {table}")
|
||||||
|
|
||||||
|
# Categories
|
||||||
|
categories = [
|
||||||
|
("Browser Automation", "⚙", "Tools for browser automation and control"),
|
||||||
|
("Proxy Services", "🔒", "Proxy providers and rotation services"),
|
||||||
|
("LLM Integration", "🤖", "AI/LLM tools and integrations"),
|
||||||
|
("Data Processing", "📊", "Data extraction and processing tools"),
|
||||||
|
("Cloud Infrastructure", "☁", "Cloud browser and computing services"),
|
||||||
|
("Developer Tools", "🛠", "Development and testing utilities")
|
||||||
|
]
|
||||||
|
|
||||||
|
for i, (name, icon, desc) in enumerate(categories):
|
||||||
|
cursor.execute("""
|
||||||
|
INSERT INTO categories (name, slug, icon, description, order_index)
|
||||||
|
VALUES (?, ?, ?, ?, ?)
|
||||||
|
""", (name, generate_slug(name), icon, desc, i))
|
||||||
|
|
||||||
|
# Apps with real Unsplash images
|
||||||
|
apps_data = [
|
||||||
|
# Browser Automation
|
||||||
|
("Playwright Cloud", "Browser Automation", "Paid", True, True,
|
||||||
|
"Scalable browser automation in the cloud with Playwright", "https://playwright.cloud",
|
||||||
|
None, "$99/month starter", 4.8, 12500,
|
||||||
|
"https://images.unsplash.com/photo-1633356122544-f134324a6cee?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
("Selenium Grid Hub", "Browser Automation", "Freemium", False, False,
|
||||||
|
"Distributed Selenium grid for parallel testing", "https://seleniumhub.io",
|
||||||
|
"https://github.com/seleniumhub/grid", "Free - $299/month", 4.2, 8400,
|
||||||
|
"https://images.unsplash.com/photo-1555066931-4365d14bab8c?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
("Puppeteer Extra", "Browser Automation", "Open Source", True, False,
|
||||||
|
"Enhanced Puppeteer with stealth plugins and more", "https://puppeteer-extra.dev",
|
||||||
|
"https://github.com/berstend/puppeteer-extra", "Free", 4.6, 15200,
|
||||||
|
"https://images.unsplash.com/photo-1461749280684-dccba630e2f6?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
# Proxy Services
|
||||||
|
("BrightData", "Proxy Services", "Paid", True, True,
|
||||||
|
"Premium proxy network with 72M+ IPs worldwide", "https://brightdata.com",
|
||||||
|
None, "Starting $500/month", 4.7, 9800,
|
||||||
|
"https://images.unsplash.com/photo-1558494949-ef010cbdcc31?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
("SmartProxy", "Proxy Services", "Paid", False, True,
|
||||||
|
"Residential and datacenter proxies with rotation", "https://smartproxy.com",
|
||||||
|
None, "Starting $75/month", 4.3, 7600,
|
||||||
|
"https://images.unsplash.com/photo-1544197150-b99a580bb7a8?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
("ProxyMesh", "Proxy Services", "Freemium", False, False,
|
||||||
|
"Rotating proxy servers with sticky sessions", "https://proxymesh.com",
|
||||||
|
None, "$10-$50/month", 4.0, 4200,
|
||||||
|
"https://images.unsplash.com/photo-1451187580459-43490279c0fa?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
# LLM Integration
|
||||||
|
("LangChain Crawl", "LLM Integration", "Open Source", True, False,
|
||||||
|
"LangChain integration for Crawl4AI workflows", "https://langchain-crawl.dev",
|
||||||
|
"https://github.com/langchain/crawl", "Free", 4.5, 18900,
|
||||||
|
"https://images.unsplash.com/photo-1677442136019-21780ecad995?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
("GPT Scraper", "LLM Integration", "Freemium", False, False,
|
||||||
|
"Extract structured data using GPT models", "https://gptscraper.ai",
|
||||||
|
None, "Free - $99/month", 4.1, 5600,
|
||||||
|
"https://images.unsplash.com/photo-1655720828018-edd2daec9349?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
("Claude Extract", "LLM Integration", "Paid", True, True,
|
||||||
|
"Professional extraction using Claude AI", "https://claude-extract.com",
|
||||||
|
None, "$199/month", 4.9, 3200,
|
||||||
|
"https://images.unsplash.com/photo-1686191128892-3b09ad503b4f?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
# Data Processing
|
||||||
|
("DataMiner Pro", "Data Processing", "Paid", False, False,
|
||||||
|
"Advanced data extraction and transformation", "https://dataminer.pro",
|
||||||
|
None, "$149/month", 4.2, 6700,
|
||||||
|
"https://images.unsplash.com/photo-1551288049-bebda4e38f71?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
("ScraperAPI", "Data Processing", "Freemium", True, True,
|
||||||
|
"Simple API for web scraping with proxy rotation", "https://scraperapi.com",
|
||||||
|
None, "Free - $299/month", 4.6, 22300,
|
||||||
|
"https://images.unsplash.com/photo-1460925895917-afdab827c52f?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
("Apify", "Data Processing", "Freemium", False, False,
|
||||||
|
"Web scraping and automation platform", "https://apify.com",
|
||||||
|
None, "$49-$499/month", 4.4, 14500,
|
||||||
|
"https://images.unsplash.com/photo-1504639725590-34d0984388bd?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
# Cloud Infrastructure
|
||||||
|
("BrowserCloud", "Cloud Infrastructure", "Paid", True, True,
|
||||||
|
"Managed headless browsers in the cloud", "https://browsercloud.io",
|
||||||
|
None, "$199/month", 4.5, 8900,
|
||||||
|
"https://images.unsplash.com/photo-1667372393119-3d4c48d07fc9?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
("LambdaTest", "Cloud Infrastructure", "Freemium", False, False,
|
||||||
|
"Cross-browser testing on cloud", "https://lambdatest.com",
|
||||||
|
None, "Free - $99/month", 4.1, 11200,
|
||||||
|
"https://images.unsplash.com/photo-1451187580459-43490279c0fa?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
("Browserless", "Cloud Infrastructure", "Freemium", True, False,
|
||||||
|
"Headless browser automation API", "https://browserless.io",
|
||||||
|
None, "$50-$500/month", 4.7, 19800,
|
||||||
|
"https://images.unsplash.com/photo-1639762681485-074b7f938ba0?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
# Developer Tools
|
||||||
|
("Crawl4AI VSCode", "Developer Tools", "Open Source", True, False,
|
||||||
|
"VSCode extension for Crawl4AI development", "https://marketplace.visualstudio.com",
|
||||||
|
"https://github.com/crawl4ai/vscode", "Free", 4.8, 34500,
|
||||||
|
"https://images.unsplash.com/photo-1629654297299-c8506221ca97?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
("Postman Collection", "Developer Tools", "Open Source", False, False,
|
||||||
|
"Postman collection for Crawl4AI API testing", "https://postman.com/crawl4ai",
|
||||||
|
"https://github.com/crawl4ai/postman", "Free", 4.3, 7800,
|
||||||
|
"https://images.unsplash.com/photo-1599507593499-a3f7d7d97667?w=800&h=400&fit=crop"),
|
||||||
|
|
||||||
|
("Debug Toolkit", "Developer Tools", "Open Source", False, False,
|
||||||
|
"Debugging tools for crawler development", "https://debug.crawl4ai.com",
|
||||||
|
"https://github.com/crawl4ai/debug", "Free", 4.0, 4300,
|
||||||
|
"https://images.unsplash.com/photo-1515879218367-8466d910aaa4?w=800&h=400&fit=crop"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for name, category, type_, featured, sponsored, desc, url, github, pricing, rating, downloads, image in apps_data:
|
||||||
|
screenshots = json.dumps([
|
||||||
|
f"https://images.unsplash.com/photo-{random.randint(1500000000000, 1700000000000)}-{random.randint(1000000000000, 9999999999999)}?w=800&h=600&fit=crop",
|
||||||
|
f"https://images.unsplash.com/photo-{random.randint(1500000000000, 1700000000000)}-{random.randint(1000000000000, 9999999999999)}?w=800&h=600&fit=crop"
|
||||||
|
])
|
||||||
|
cursor.execute("""
|
||||||
|
INSERT INTO apps (name, slug, description, category, type, featured, sponsored,
|
||||||
|
website_url, github_url, pricing, rating, downloads, image, screenshots, logo_url,
|
||||||
|
integration_guide, contact_email, views)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""", (name, generate_slug(name), desc, category, type_, featured, sponsored,
|
||||||
|
url, github, pricing, rating, downloads, image, screenshots,
|
||||||
|
f"https://ui-avatars.com/api/?name={name}&background=50ffff&color=070708&size=128",
|
||||||
|
f"# {name} Integration\n\n```python\nfrom crawl4ai import AsyncWebCrawler\n# Integration code coming soon...\n```",
|
||||||
|
f"contact@{generate_slug(name)}.com",
|
||||||
|
random.randint(100, 5000)))
|
||||||
|
|
||||||
|
# Articles with real images
|
||||||
|
articles_data = [
|
||||||
|
("Browser Automation Showdown: Playwright vs Puppeteer vs Selenium",
|
||||||
|
"Review", "John Doe", ["Playwright Cloud", "Puppeteer Extra"],
|
||||||
|
["browser-automation", "comparison", "2024"],
|
||||||
|
"https://images.unsplash.com/photo-1587620962725-abab7fe55159?w=1200&h=630&fit=crop"),
|
||||||
|
|
||||||
|
("Top 5 Proxy Services for Web Scraping in 2024",
|
||||||
|
"Comparison", "Jane Smith", ["BrightData", "SmartProxy", "ProxyMesh"],
|
||||||
|
["proxy", "web-scraping", "guide"],
|
||||||
|
"https://images.unsplash.com/photo-1558494949-ef010cbdcc31?w=1200&h=630&fit=crop"),
|
||||||
|
|
||||||
|
("Integrating LLMs with Crawl4AI: A Complete Guide",
|
||||||
|
"Tutorial", "Crawl4AI Team", ["LangChain Crawl", "GPT Scraper", "Claude Extract"],
|
||||||
|
["llm", "integration", "tutorial"],
|
||||||
|
"https://images.unsplash.com/photo-1677442136019-21780ecad995?w=1200&h=630&fit=crop"),
|
||||||
|
|
||||||
|
("Building Scalable Crawlers with Cloud Infrastructure",
|
||||||
|
"Tutorial", "Mike Johnson", ["BrowserCloud", "Browserless"],
|
||||||
|
["cloud", "scalability", "architecture"],
|
||||||
|
"https://images.unsplash.com/photo-1667372393119-3d4c48d07fc9?w=1200&h=630&fit=crop"),
|
||||||
|
|
||||||
|
("What's New in Crawl4AI Marketplace",
|
||||||
|
"News", "Crawl4AI Team", [],
|
||||||
|
["marketplace", "announcement", "news"],
|
||||||
|
"https://images.unsplash.com/photo-1556075798-4825dfaaf498?w=1200&h=630&fit=crop"),
|
||||||
|
|
||||||
|
("Cost Analysis: Self-Hosted vs Cloud Browser Solutions",
|
||||||
|
"Comparison", "Sarah Chen", ["BrowserCloud", "LambdaTest", "Browserless"],
|
||||||
|
["cost", "cloud", "comparison"],
|
||||||
|
"https://images.unsplash.com/photo-1554224155-8d04cb21cd6c?w=1200&h=630&fit=crop"),
|
||||||
|
|
||||||
|
("Getting Started with Browser Automation",
|
||||||
|
"Tutorial", "Crawl4AI Team", ["Playwright Cloud", "Selenium Grid Hub"],
|
||||||
|
["beginner", "tutorial", "automation"],
|
||||||
|
"https://images.unsplash.com/photo-1498050108023-c5249f4df085?w=1200&h=630&fit=crop"),
|
||||||
|
|
||||||
|
("The Future of Web Scraping: AI-Powered Extraction",
|
||||||
|
"News", "Dr. Alan Turing", ["Claude Extract", "GPT Scraper"],
|
||||||
|
["ai", "future", "trends"],
|
||||||
|
"https://images.unsplash.com/photo-1593720213428-28a5b9e94613?w=1200&h=630&fit=crop")
|
||||||
|
]
|
||||||
|
|
||||||
|
for title, category, author, related_apps, tags, image in articles_data:
|
||||||
|
# Get app IDs for related apps
|
||||||
|
related_ids = []
|
||||||
|
for app_name in related_apps:
|
||||||
|
cursor.execute("SELECT id FROM apps WHERE name = ?", (app_name,))
|
||||||
|
result = cursor.fetchone()
|
||||||
|
if result:
|
||||||
|
related_ids.append(result[0])
|
||||||
|
|
||||||
|
content = f"""# {title}
|
||||||
|
|
||||||
|
By {author} | {datetime.now().strftime('%B %d, %Y')}
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
This is a comprehensive article about {title.lower()}. Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
||||||
|
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
|
||||||
|
|
||||||
|
## Key Points
|
||||||
|
|
||||||
|
- Important point about the topic
|
||||||
|
- Another crucial insight
|
||||||
|
- Technical details and specifications
|
||||||
|
- Performance comparisons
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
In summary, this article explored various aspects of the topic. Stay tuned for more updates!
|
||||||
|
"""
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
INSERT INTO articles (title, slug, content, author, category, related_apps,
|
||||||
|
featured_image, tags, views)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""", (title, generate_slug(title), content, author, category,
|
||||||
|
json.dumps(related_ids), image, json.dumps(tags),
|
||||||
|
random.randint(200, 10000)))
|
||||||
|
|
||||||
|
# Sponsors
|
||||||
|
sponsors_data = [
|
||||||
|
("BrightData", "Gold", "https://brightdata.com",
|
||||||
|
"https://images.unsplash.com/photo-1558494949-ef010cbdcc31?w=728&h=90&fit=crop"),
|
||||||
|
("ScraperAPI", "Gold", "https://scraperapi.com",
|
||||||
|
"https://images.unsplash.com/photo-1460925895917-afdab827c52f?w=728&h=90&fit=crop"),
|
||||||
|
("BrowserCloud", "Silver", "https://browsercloud.io",
|
||||||
|
"https://images.unsplash.com/photo-1667372393119-3d4c48d07fc9?w=728&h=90&fit=crop"),
|
||||||
|
("Claude Extract", "Silver", "https://claude-extract.com",
|
||||||
|
"https://images.unsplash.com/photo-1686191128892-3b09ad503b4f?w=728&h=90&fit=crop"),
|
||||||
|
("SmartProxy", "Bronze", "https://smartproxy.com",
|
||||||
|
"https://images.unsplash.com/photo-1544197150-b99a580bb7a8?w=728&h=90&fit=crop")
|
||||||
|
]
|
||||||
|
|
||||||
|
for company, tier, landing_url, banner in sponsors_data:
|
||||||
|
start_date = datetime.now() - timedelta(days=random.randint(1, 30))
|
||||||
|
end_date = datetime.now() + timedelta(days=random.randint(30, 180))
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
INSERT INTO sponsors (company_name, logo_url, tier, banner_url,
|
||||||
|
landing_url, active, start_date, end_date)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""", (company,
|
||||||
|
f"https://ui-avatars.com/api/?name={company}&background=09b5a5&color=fff&size=200",
|
||||||
|
tier, banner, landing_url, 1,
|
||||||
|
start_date.isoformat(), end_date.isoformat()))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
print("✓ Dummy data generated successfully!")
|
||||||
|
print(f" - {len(categories)} categories")
|
||||||
|
print(f" - {len(apps_data)} apps")
|
||||||
|
print(f" - {len(articles_data)} articles")
|
||||||
|
print(f" - {len(sponsors_data)} sponsors")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
generate_dummy_data()
|
||||||
5
docs/md_v2/marketplace/backend/requirements.txt
Normal file
5
docs/md_v2/marketplace/backend/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
fastapi
|
||||||
|
uvicorn
|
||||||
|
pyyaml
|
||||||
|
python-multipart
|
||||||
|
python-dotenv
|
||||||
75
docs/md_v2/marketplace/backend/schema.yaml
Normal file
75
docs/md_v2/marketplace/backend/schema.yaml
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
database:
|
||||||
|
name: marketplace.db
|
||||||
|
|
||||||
|
tables:
|
||||||
|
apps:
|
||||||
|
columns:
|
||||||
|
id: {type: INTEGER, primary: true, autoincrement: true}
|
||||||
|
name: {type: TEXT, required: true}
|
||||||
|
slug: {type: TEXT, unique: true}
|
||||||
|
description: {type: TEXT}
|
||||||
|
long_description: {type: TEXT}
|
||||||
|
logo_url: {type: TEXT}
|
||||||
|
image: {type: TEXT}
|
||||||
|
screenshots: {type: JSON, default: '[]'}
|
||||||
|
category: {type: TEXT}
|
||||||
|
type: {type: TEXT, default: 'Open Source'}
|
||||||
|
status: {type: TEXT, default: 'Active'}
|
||||||
|
website_url: {type: TEXT}
|
||||||
|
github_url: {type: TEXT}
|
||||||
|
demo_url: {type: TEXT}
|
||||||
|
video_url: {type: TEXT}
|
||||||
|
documentation_url: {type: TEXT}
|
||||||
|
support_url: {type: TEXT}
|
||||||
|
discord_url: {type: TEXT}
|
||||||
|
pricing: {type: TEXT}
|
||||||
|
rating: {type: REAL, default: 0.0}
|
||||||
|
downloads: {type: INTEGER, default: 0}
|
||||||
|
featured: {type: BOOLEAN, default: 0}
|
||||||
|
sponsored: {type: BOOLEAN, default: 0}
|
||||||
|
integration_guide: {type: TEXT}
|
||||||
|
documentation: {type: TEXT}
|
||||||
|
examples: {type: TEXT}
|
||||||
|
installation_command: {type: TEXT}
|
||||||
|
requirements: {type: TEXT}
|
||||||
|
changelog: {type: TEXT}
|
||||||
|
tags: {type: JSON, default: '[]'}
|
||||||
|
added_date: {type: DATETIME, default: CURRENT_TIMESTAMP}
|
||||||
|
updated_date: {type: DATETIME, default: CURRENT_TIMESTAMP}
|
||||||
|
contact_email: {type: TEXT}
|
||||||
|
views: {type: INTEGER, default: 0}
|
||||||
|
|
||||||
|
articles:
|
||||||
|
columns:
|
||||||
|
id: {type: INTEGER, primary: true, autoincrement: true}
|
||||||
|
title: {type: TEXT, required: true}
|
||||||
|
slug: {type: TEXT, unique: true}
|
||||||
|
content: {type: TEXT}
|
||||||
|
author: {type: TEXT, default: 'Crawl4AI Team'}
|
||||||
|
category: {type: TEXT}
|
||||||
|
related_apps: {type: JSON, default: '[]'}
|
||||||
|
featured_image: {type: TEXT}
|
||||||
|
published_date: {type: DATETIME, default: CURRENT_TIMESTAMP}
|
||||||
|
tags: {type: JSON, default: '[]'}
|
||||||
|
views: {type: INTEGER, default: 0}
|
||||||
|
|
||||||
|
categories:
|
||||||
|
columns:
|
||||||
|
id: {type: INTEGER, primary: true, autoincrement: true}
|
||||||
|
name: {type: TEXT, unique: true}
|
||||||
|
slug: {type: TEXT, unique: true}
|
||||||
|
icon: {type: TEXT}
|
||||||
|
description: {type: TEXT}
|
||||||
|
order_index: {type: INTEGER, default: 0}
|
||||||
|
|
||||||
|
sponsors:
|
||||||
|
columns:
|
||||||
|
id: {type: INTEGER, primary: true, autoincrement: true}
|
||||||
|
company_name: {type: TEXT, required: true}
|
||||||
|
logo_url: {type: TEXT}
|
||||||
|
tier: {type: TEXT, default: 'Bronze'}
|
||||||
|
banner_url: {type: TEXT}
|
||||||
|
landing_url: {type: TEXT}
|
||||||
|
active: {type: BOOLEAN, default: 1}
|
||||||
|
start_date: {type: DATETIME}
|
||||||
|
end_date: {type: DATETIME}
|
||||||
497
docs/md_v2/marketplace/backend/server.py
Normal file
497
docs/md_v2/marketplace/backend/server.py
Normal file
@@ -0,0 +1,497 @@
|
|||||||
|
from fastapi import FastAPI, HTTPException, Query, Depends, Body, UploadFile, File, Form, APIRouter
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||||
|
from typing import Optional, Dict, Any
|
||||||
|
import json
|
||||||
|
import hashlib
|
||||||
|
import secrets
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from database import DatabaseManager
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
# Import configuration (will exit if .env not found or invalid)
|
||||||
|
from config import Config
|
||||||
|
|
||||||
|
app = FastAPI(title="Crawl4AI Marketplace API")
|
||||||
|
router = APIRouter(prefix="/marketplace/api")
|
||||||
|
|
||||||
|
# Security setup
|
||||||
|
security = HTTPBearer()
|
||||||
|
tokens = {} # In production, use Redis or database for token storage
|
||||||
|
|
||||||
|
# CORS configuration
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=Config.ALLOWED_ORIGINS,
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
max_age=3600
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize database with configurable path
|
||||||
|
db = DatabaseManager(Config.DATABASE_PATH)
|
||||||
|
|
||||||
|
BASE_DIR = Path(__file__).parent
|
||||||
|
UPLOAD_ROOT = BASE_DIR / "uploads"
|
||||||
|
UPLOAD_ROOT.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
app.mount("/uploads", StaticFiles(directory=UPLOAD_ROOT), name="uploads")
|
||||||
|
|
||||||
|
ALLOWED_IMAGE_TYPES = {
|
||||||
|
"image/png": ".png",
|
||||||
|
"image/jpeg": ".jpg",
|
||||||
|
"image/webp": ".webp",
|
||||||
|
"image/svg+xml": ".svg"
|
||||||
|
}
|
||||||
|
ALLOWED_UPLOAD_FOLDERS = {"sponsors"}
|
||||||
|
MAX_UPLOAD_SIZE = 2 * 1024 * 1024 # 2 MB
|
||||||
|
|
||||||
|
def json_response(data, cache_time=3600):
|
||||||
|
"""Helper to return JSON with cache headers"""
|
||||||
|
return JSONResponse(
|
||||||
|
content=data,
|
||||||
|
headers={
|
||||||
|
"Cache-Control": f"public, max-age={cache_time}",
|
||||||
|
"X-Content-Type-Options": "nosniff"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def to_int(value, default=0):
|
||||||
|
"""Coerce incoming values to integers, falling back to default."""
|
||||||
|
if value is None:
|
||||||
|
return default
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return int(value)
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
return int(value)
|
||||||
|
|
||||||
|
if isinstance(value, str):
|
||||||
|
stripped = value.strip()
|
||||||
|
if not stripped:
|
||||||
|
return default
|
||||||
|
|
||||||
|
match = re.match(r"^-?\d+", stripped)
|
||||||
|
if match:
|
||||||
|
try:
|
||||||
|
return int(match.group())
|
||||||
|
except ValueError:
|
||||||
|
return default
|
||||||
|
return default
|
||||||
|
|
||||||
|
# ============= PUBLIC ENDPOINTS =============
|
||||||
|
|
||||||
|
@router.get("/apps")
|
||||||
|
async def get_apps(
|
||||||
|
category: Optional[str] = None,
|
||||||
|
type: Optional[str] = None,
|
||||||
|
featured: Optional[bool] = None,
|
||||||
|
sponsored: Optional[bool] = None,
|
||||||
|
limit: int = Query(default=20, le=10000),
|
||||||
|
offset: int = Query(default=0)
|
||||||
|
):
|
||||||
|
"""Get apps with optional filters"""
|
||||||
|
where_clauses = []
|
||||||
|
if category:
|
||||||
|
where_clauses.append(f"category = '{category}'")
|
||||||
|
if type:
|
||||||
|
where_clauses.append(f"type = '{type}'")
|
||||||
|
if featured is not None:
|
||||||
|
where_clauses.append(f"featured = {1 if featured else 0}")
|
||||||
|
if sponsored is not None:
|
||||||
|
where_clauses.append(f"sponsored = {1 if sponsored else 0}")
|
||||||
|
|
||||||
|
where = " AND ".join(where_clauses) if where_clauses else None
|
||||||
|
apps = db.get_all('apps', limit=limit, offset=offset, where=where)
|
||||||
|
|
||||||
|
# Parse JSON fields
|
||||||
|
for app in apps:
|
||||||
|
if app.get('screenshots'):
|
||||||
|
app['screenshots'] = json.loads(app['screenshots'])
|
||||||
|
|
||||||
|
return json_response(apps)
|
||||||
|
|
||||||
|
@router.get("/apps/{slug}")
|
||||||
|
async def get_app(slug: str):
|
||||||
|
"""Get single app by slug"""
|
||||||
|
apps = db.get_all('apps', where=f"slug = '{slug}'", limit=1)
|
||||||
|
if not apps:
|
||||||
|
raise HTTPException(status_code=404, detail="App not found")
|
||||||
|
|
||||||
|
app = apps[0]
|
||||||
|
if app.get('screenshots'):
|
||||||
|
app['screenshots'] = json.loads(app['screenshots'])
|
||||||
|
|
||||||
|
return json_response(app)
|
||||||
|
|
||||||
|
@router.get("/articles")
|
||||||
|
async def get_articles(
|
||||||
|
category: Optional[str] = None,
|
||||||
|
limit: int = Query(default=20, le=10000),
|
||||||
|
offset: int = Query(default=0)
|
||||||
|
):
|
||||||
|
"""Get articles with optional category filter"""
|
||||||
|
where = f"category = '{category}'" if category else None
|
||||||
|
articles = db.get_all('articles', limit=limit, offset=offset, where=where)
|
||||||
|
|
||||||
|
# Parse JSON fields
|
||||||
|
for article in articles:
|
||||||
|
if article.get('related_apps'):
|
||||||
|
article['related_apps'] = json.loads(article['related_apps'])
|
||||||
|
if article.get('tags'):
|
||||||
|
article['tags'] = json.loads(article['tags'])
|
||||||
|
|
||||||
|
return json_response(articles)
|
||||||
|
|
||||||
|
@router.get("/articles/{slug}")
|
||||||
|
async def get_article(slug: str):
|
||||||
|
"""Get single article by slug"""
|
||||||
|
articles = db.get_all('articles', where=f"slug = '{slug}'", limit=1)
|
||||||
|
if not articles:
|
||||||
|
raise HTTPException(status_code=404, detail="Article not found")
|
||||||
|
|
||||||
|
article = articles[0]
|
||||||
|
if article.get('related_apps'):
|
||||||
|
article['related_apps'] = json.loads(article['related_apps'])
|
||||||
|
if article.get('tags'):
|
||||||
|
article['tags'] = json.loads(article['tags'])
|
||||||
|
|
||||||
|
return json_response(article)
|
||||||
|
|
||||||
|
@router.get("/categories")
|
||||||
|
async def get_categories():
|
||||||
|
"""Get all categories ordered by index"""
|
||||||
|
categories = db.get_all('categories', limit=50)
|
||||||
|
for category in categories:
|
||||||
|
category['order_index'] = to_int(category.get('order_index'), 0)
|
||||||
|
categories.sort(key=lambda x: x.get('order_index', 0))
|
||||||
|
return json_response(categories, cache_time=7200)
|
||||||
|
|
||||||
|
@router.get("/sponsors")
|
||||||
|
async def get_sponsors(active: Optional[bool] = True):
|
||||||
|
"""Get sponsors, default active only"""
|
||||||
|
where = f"active = {1 if active else 0}" if active is not None else None
|
||||||
|
sponsors = db.get_all('sponsors', where=where, limit=20)
|
||||||
|
|
||||||
|
# Filter by date if active
|
||||||
|
if active:
|
||||||
|
now = datetime.now().isoformat()
|
||||||
|
sponsors = [s for s in sponsors
|
||||||
|
if (not s.get('start_date') or s['start_date'] <= now) and
|
||||||
|
(not s.get('end_date') or s['end_date'] >= now)]
|
||||||
|
|
||||||
|
return json_response(sponsors)
|
||||||
|
|
||||||
|
@router.get("/search")
|
||||||
|
async def search(q: str = Query(min_length=2)):
|
||||||
|
"""Search across apps and articles"""
|
||||||
|
if len(q) < 2:
|
||||||
|
return json_response({})
|
||||||
|
|
||||||
|
results = db.search(q, tables=['apps', 'articles'])
|
||||||
|
|
||||||
|
# Parse JSON fields in results
|
||||||
|
for table, items in results.items():
|
||||||
|
for item in items:
|
||||||
|
if table == 'apps' and item.get('screenshots'):
|
||||||
|
item['screenshots'] = json.loads(item['screenshots'])
|
||||||
|
elif table == 'articles':
|
||||||
|
if item.get('related_apps'):
|
||||||
|
item['related_apps'] = json.loads(item['related_apps'])
|
||||||
|
if item.get('tags'):
|
||||||
|
item['tags'] = json.loads(item['tags'])
|
||||||
|
|
||||||
|
return json_response(results, cache_time=1800)
|
||||||
|
|
||||||
|
@router.get("/stats")
|
||||||
|
async def get_stats():
|
||||||
|
"""Get marketplace statistics"""
|
||||||
|
stats = {
|
||||||
|
"total_apps": len(db.get_all('apps', limit=10000)),
|
||||||
|
"total_articles": len(db.get_all('articles', limit=10000)),
|
||||||
|
"total_categories": len(db.get_all('categories', limit=1000)),
|
||||||
|
"active_sponsors": len(db.get_all('sponsors', where="active = 1", limit=1000))
|
||||||
|
}
|
||||||
|
return json_response(stats, cache_time=1800)
|
||||||
|
|
||||||
|
# ============= ADMIN AUTHENTICATION =============
|
||||||
|
|
||||||
|
def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
||||||
|
"""Verify admin authentication token"""
|
||||||
|
token = credentials.credentials
|
||||||
|
if token not in tokens or tokens[token] < datetime.now():
|
||||||
|
raise HTTPException(status_code=401, detail="Invalid or expired token")
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/admin/upload-image", dependencies=[Depends(verify_token)])
|
||||||
|
async def upload_image(file: UploadFile = File(...), folder: str = Form("sponsors")):
|
||||||
|
"""Upload image files for admin assets"""
|
||||||
|
folder = (folder or "").strip().lower()
|
||||||
|
if folder not in ALLOWED_UPLOAD_FOLDERS:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid upload folder")
|
||||||
|
|
||||||
|
if file.content_type not in ALLOWED_IMAGE_TYPES:
|
||||||
|
raise HTTPException(status_code=400, detail="Unsupported file type")
|
||||||
|
|
||||||
|
contents = await file.read()
|
||||||
|
if len(contents) > MAX_UPLOAD_SIZE:
|
||||||
|
raise HTTPException(status_code=400, detail="File too large (max 2MB)")
|
||||||
|
|
||||||
|
extension = ALLOWED_IMAGE_TYPES[file.content_type]
|
||||||
|
filename = f"{datetime.now().strftime('%Y%m%d%H%M%S')}_{secrets.token_hex(8)}{extension}"
|
||||||
|
|
||||||
|
target_dir = UPLOAD_ROOT / folder
|
||||||
|
target_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
target_path = target_dir / filename
|
||||||
|
target_path.write_bytes(contents)
|
||||||
|
|
||||||
|
return {"url": f"/uploads/{folder}/{filename}"}
|
||||||
|
|
||||||
|
@router.post("/admin/login")
|
||||||
|
async def admin_login(password: str = Body(..., embed=True)):
|
||||||
|
"""Admin login with password"""
|
||||||
|
provided_hash = hashlib.sha256(password.encode()).hexdigest()
|
||||||
|
|
||||||
|
if provided_hash != Config.ADMIN_PASSWORD_HASH:
|
||||||
|
# Log failed attempt in production
|
||||||
|
print(f"Failed login attempt at {datetime.now()}")
|
||||||
|
raise HTTPException(status_code=401, detail="Invalid password")
|
||||||
|
|
||||||
|
# Generate secure token
|
||||||
|
token = secrets.token_urlsafe(32)
|
||||||
|
tokens[token] = datetime.now() + timedelta(hours=Config.TOKEN_EXPIRY_HOURS)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"token": token,
|
||||||
|
"expires_in": Config.TOKEN_EXPIRY_HOURS * 3600
|
||||||
|
}
|
||||||
|
|
||||||
|
# ============= ADMIN ENDPOINTS =============
|
||||||
|
|
||||||
|
@router.get("/admin/stats", dependencies=[Depends(verify_token)])
|
||||||
|
async def get_admin_stats():
|
||||||
|
"""Get detailed admin statistics"""
|
||||||
|
stats = {
|
||||||
|
"apps": {
|
||||||
|
"total": len(db.get_all('apps', limit=10000)),
|
||||||
|
"featured": len(db.get_all('apps', where="featured = 1", limit=10000)),
|
||||||
|
"sponsored": len(db.get_all('apps', where="sponsored = 1", limit=10000))
|
||||||
|
},
|
||||||
|
"articles": len(db.get_all('articles', limit=10000)),
|
||||||
|
"categories": len(db.get_all('categories', limit=1000)),
|
||||||
|
"sponsors": {
|
||||||
|
"active": len(db.get_all('sponsors', where="active = 1", limit=1000)),
|
||||||
|
"total": len(db.get_all('sponsors', limit=10000))
|
||||||
|
},
|
||||||
|
"total_views": sum(app.get('views', 0) for app in db.get_all('apps', limit=10000))
|
||||||
|
}
|
||||||
|
return stats
|
||||||
|
|
||||||
|
# Apps CRUD
|
||||||
|
@router.post("/admin/apps", dependencies=[Depends(verify_token)])
|
||||||
|
async def create_app(app_data: Dict[str, Any]):
|
||||||
|
"""Create new app"""
|
||||||
|
try:
|
||||||
|
# Handle JSON fields
|
||||||
|
for field in ['screenshots', 'tags']:
|
||||||
|
if field in app_data and isinstance(app_data[field], list):
|
||||||
|
app_data[field] = json.dumps(app_data[field])
|
||||||
|
|
||||||
|
cursor = db.conn.cursor()
|
||||||
|
columns = ', '.join(app_data.keys())
|
||||||
|
placeholders = ', '.join(['?' for _ in app_data])
|
||||||
|
cursor.execute(f"INSERT INTO apps ({columns}) VALUES ({placeholders})",
|
||||||
|
list(app_data.values()))
|
||||||
|
db.conn.commit()
|
||||||
|
return {"id": cursor.lastrowid, "message": "App created"}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
|
@router.put("/admin/apps/{app_id}", dependencies=[Depends(verify_token)])
|
||||||
|
async def update_app(app_id: int, app_data: Dict[str, Any]):
|
||||||
|
"""Update app"""
|
||||||
|
try:
|
||||||
|
# Handle JSON fields
|
||||||
|
for field in ['screenshots', 'tags']:
|
||||||
|
if field in app_data and isinstance(app_data[field], list):
|
||||||
|
app_data[field] = json.dumps(app_data[field])
|
||||||
|
|
||||||
|
set_clause = ', '.join([f"{k} = ?" for k in app_data.keys()])
|
||||||
|
cursor = db.conn.cursor()
|
||||||
|
cursor.execute(f"UPDATE apps SET {set_clause} WHERE id = ?",
|
||||||
|
list(app_data.values()) + [app_id])
|
||||||
|
db.conn.commit()
|
||||||
|
return {"message": "App updated"}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
|
@router.delete("/admin/apps/{app_id}", dependencies=[Depends(verify_token)])
|
||||||
|
async def delete_app(app_id: int):
|
||||||
|
"""Delete app"""
|
||||||
|
cursor = db.conn.cursor()
|
||||||
|
cursor.execute("DELETE FROM apps WHERE id = ?", (app_id,))
|
||||||
|
db.conn.commit()
|
||||||
|
return {"message": "App deleted"}
|
||||||
|
|
||||||
|
# Articles CRUD
|
||||||
|
@router.post("/admin/articles", dependencies=[Depends(verify_token)])
|
||||||
|
async def create_article(article_data: Dict[str, Any]):
|
||||||
|
"""Create new article"""
|
||||||
|
try:
|
||||||
|
for field in ['related_apps', 'tags']:
|
||||||
|
if field in article_data and isinstance(article_data[field], list):
|
||||||
|
article_data[field] = json.dumps(article_data[field])
|
||||||
|
|
||||||
|
cursor = db.conn.cursor()
|
||||||
|
columns = ', '.join(article_data.keys())
|
||||||
|
placeholders = ', '.join(['?' for _ in article_data])
|
||||||
|
cursor.execute(f"INSERT INTO articles ({columns}) VALUES ({placeholders})",
|
||||||
|
list(article_data.values()))
|
||||||
|
db.conn.commit()
|
||||||
|
return {"id": cursor.lastrowid, "message": "Article created"}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
|
@router.put("/admin/articles/{article_id}", dependencies=[Depends(verify_token)])
|
||||||
|
async def update_article(article_id: int, article_data: Dict[str, Any]):
|
||||||
|
"""Update article"""
|
||||||
|
try:
|
||||||
|
for field in ['related_apps', 'tags']:
|
||||||
|
if field in article_data and isinstance(article_data[field], list):
|
||||||
|
article_data[field] = json.dumps(article_data[field])
|
||||||
|
|
||||||
|
set_clause = ', '.join([f"{k} = ?" for k in article_data.keys()])
|
||||||
|
cursor = db.conn.cursor()
|
||||||
|
cursor.execute(f"UPDATE articles SET {set_clause} WHERE id = ?",
|
||||||
|
list(article_data.values()) + [article_id])
|
||||||
|
db.conn.commit()
|
||||||
|
return {"message": "Article updated"}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
|
@router.delete("/admin/articles/{article_id}", dependencies=[Depends(verify_token)])
|
||||||
|
async def delete_article(article_id: int):
|
||||||
|
"""Delete article"""
|
||||||
|
cursor = db.conn.cursor()
|
||||||
|
cursor.execute("DELETE FROM articles WHERE id = ?", (article_id,))
|
||||||
|
db.conn.commit()
|
||||||
|
return {"message": "Article deleted"}
|
||||||
|
|
||||||
|
# Categories CRUD
|
||||||
|
@router.post("/admin/categories", dependencies=[Depends(verify_token)])
|
||||||
|
async def create_category(category_data: Dict[str, Any]):
|
||||||
|
"""Create new category"""
|
||||||
|
try:
|
||||||
|
category_data = dict(category_data)
|
||||||
|
category_data['order_index'] = to_int(category_data.get('order_index'), 0)
|
||||||
|
|
||||||
|
cursor = db.conn.cursor()
|
||||||
|
columns = ', '.join(category_data.keys())
|
||||||
|
placeholders = ', '.join(['?' for _ in category_data])
|
||||||
|
cursor.execute(f"INSERT INTO categories ({columns}) VALUES ({placeholders})",
|
||||||
|
list(category_data.values()))
|
||||||
|
db.conn.commit()
|
||||||
|
return {"id": cursor.lastrowid, "message": "Category created"}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
|
@router.put("/admin/categories/{cat_id}", dependencies=[Depends(verify_token)])
|
||||||
|
async def update_category(cat_id: int, category_data: Dict[str, Any]):
|
||||||
|
"""Update category"""
|
||||||
|
try:
|
||||||
|
category_data = dict(category_data)
|
||||||
|
if 'order_index' in category_data:
|
||||||
|
category_data['order_index'] = to_int(category_data.get('order_index'), 0)
|
||||||
|
|
||||||
|
set_clause = ', '.join([f"{k} = ?" for k in category_data.keys()])
|
||||||
|
cursor = db.conn.cursor()
|
||||||
|
cursor.execute(f"UPDATE categories SET {set_clause} WHERE id = ?",
|
||||||
|
list(category_data.values()) + [cat_id])
|
||||||
|
db.conn.commit()
|
||||||
|
return {"message": "Category updated"}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/admin/categories/{cat_id}", dependencies=[Depends(verify_token)])
|
||||||
|
async def delete_category(cat_id: int):
|
||||||
|
"""Delete category"""
|
||||||
|
try:
|
||||||
|
cursor = db.conn.cursor()
|
||||||
|
cursor.execute("DELETE FROM categories WHERE id = ?", (cat_id,))
|
||||||
|
db.conn.commit()
|
||||||
|
return {"message": "Category deleted"}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
|
# Sponsors CRUD
|
||||||
|
@router.post("/admin/sponsors", dependencies=[Depends(verify_token)])
|
||||||
|
async def create_sponsor(sponsor_data: Dict[str, Any]):
|
||||||
|
"""Create new sponsor"""
|
||||||
|
try:
|
||||||
|
cursor = db.conn.cursor()
|
||||||
|
columns = ', '.join(sponsor_data.keys())
|
||||||
|
placeholders = ', '.join(['?' for _ in sponsor_data])
|
||||||
|
cursor.execute(f"INSERT INTO sponsors ({columns}) VALUES ({placeholders})",
|
||||||
|
list(sponsor_data.values()))
|
||||||
|
db.conn.commit()
|
||||||
|
return {"id": cursor.lastrowid, "message": "Sponsor created"}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
|
@router.put("/admin/sponsors/{sponsor_id}", dependencies=[Depends(verify_token)])
|
||||||
|
async def update_sponsor(sponsor_id: int, sponsor_data: Dict[str, Any]):
|
||||||
|
"""Update sponsor"""
|
||||||
|
try:
|
||||||
|
set_clause = ', '.join([f"{k} = ?" for k in sponsor_data.keys()])
|
||||||
|
cursor = db.conn.cursor()
|
||||||
|
cursor.execute(f"UPDATE sponsors SET {set_clause} WHERE id = ?",
|
||||||
|
list(sponsor_data.values()) + [sponsor_id])
|
||||||
|
db.conn.commit()
|
||||||
|
return {"message": "Sponsor updated"}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/admin/sponsors/{sponsor_id}", dependencies=[Depends(verify_token)])
|
||||||
|
async def delete_sponsor(sponsor_id: int):
|
||||||
|
"""Delete sponsor"""
|
||||||
|
try:
|
||||||
|
cursor = db.conn.cursor()
|
||||||
|
cursor.execute("DELETE FROM sponsors WHERE id = ?", (sponsor_id,))
|
||||||
|
db.conn.commit()
|
||||||
|
return {"message": "Sponsor deleted"}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
|
app.include_router(router)
|
||||||
|
|
||||||
|
# Version info
|
||||||
|
VERSION = "1.1.0"
|
||||||
|
BUILD_DATE = "2025-10-26"
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def root():
|
||||||
|
"""API info"""
|
||||||
|
return {
|
||||||
|
"name": "Crawl4AI Marketplace API",
|
||||||
|
"version": VERSION,
|
||||||
|
"build_date": BUILD_DATE,
|
||||||
|
"endpoints": [
|
||||||
|
"/marketplace/api/apps",
|
||||||
|
"/marketplace/api/articles",
|
||||||
|
"/marketplace/api/categories",
|
||||||
|
"/marketplace/api/sponsors",
|
||||||
|
"/marketplace/api/search?q=query",
|
||||||
|
"/marketplace/api/stats"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run(app, host="127.0.0.1", port=8100)
|
||||||
2
docs/md_v2/marketplace/backend/uploads/.gitignore
vendored
Normal file
2
docs/md_v2/marketplace/backend/uploads/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
*
|
||||||
|
!.gitignore
|
||||||
462
docs/md_v2/marketplace/frontend/app-detail.css
Normal file
462
docs/md_v2/marketplace/frontend/app-detail.css
Normal file
@@ -0,0 +1,462 @@
|
|||||||
|
/* App Detail Page Styles */
|
||||||
|
|
||||||
|
.app-detail-container {
|
||||||
|
min-height: 100vh;
|
||||||
|
background: var(--bg-dark);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Back Button */
|
||||||
|
.header-nav {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.back-btn {
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
text-decoration: none;
|
||||||
|
transition: all 0.2s;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.back-btn:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
background: rgba(80, 255, 255, 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* App Hero Section */
|
||||||
|
.app-hero {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 2rem auto;
|
||||||
|
padding: 0 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-hero-content {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr 2fr;
|
||||||
|
gap: 3rem;
|
||||||
|
background: linear-gradient(135deg, #1a1a2e, #0f0f1e);
|
||||||
|
border: 2px solid var(--primary-cyan);
|
||||||
|
padding: 2rem;
|
||||||
|
box-shadow: 0 0 30px rgba(80, 255, 255, 0.15),
|
||||||
|
inset 0 0 20px rgba(80, 255, 255, 0.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-hero-image {
|
||||||
|
width: 100%;
|
||||||
|
height: 300px;
|
||||||
|
background: linear-gradient(135deg, rgba(80, 255, 255, 0.1), rgba(243, 128, 245, 0.05));
|
||||||
|
background-size: cover;
|
||||||
|
background-position: center;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
font-size: 4rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-badges {
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-badge {
|
||||||
|
padding: 0.3rem 0.6rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 0.75rem;
|
||||||
|
text-transform: uppercase;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-badge.featured {
|
||||||
|
background: linear-gradient(135deg, var(--primary-cyan), var(--primary-teal));
|
||||||
|
color: var(--bg-dark);
|
||||||
|
box-shadow: 0 2px 10px rgba(80, 255, 255, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-badge.sponsored {
|
||||||
|
background: linear-gradient(135deg, var(--warning), #ff8c00);
|
||||||
|
color: var(--bg-dark);
|
||||||
|
box-shadow: 0 2px 10px rgba(245, 158, 11, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-hero-info h1 {
|
||||||
|
font-size: 2.5rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin: 0.5rem 0;
|
||||||
|
text-shadow: 0 0 20px rgba(80, 255, 255, 0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-tagline {
|
||||||
|
font-size: 1.1rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
margin-bottom: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Stats */
|
||||||
|
.app-stats {
|
||||||
|
display: flex;
|
||||||
|
gap: 2rem;
|
||||||
|
margin: 2rem 0;
|
||||||
|
padding: 1rem 0;
|
||||||
|
border-top: 1px solid var(--border-color);
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-value {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-label {
|
||||||
|
font-size: 0.875rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Action Buttons */
|
||||||
|
.app-actions {
|
||||||
|
display: flex;
|
||||||
|
gap: 1rem;
|
||||||
|
margin: 2rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn {
|
||||||
|
padding: 0.75rem 1.5rem;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
background: transparent;
|
||||||
|
color: var(--text-primary);
|
||||||
|
text-decoration: none;
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
transition: all 0.2s;
|
||||||
|
cursor: pointer;
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn.primary {
|
||||||
|
background: linear-gradient(135deg, var(--primary-cyan), var(--primary-teal));
|
||||||
|
color: var(--bg-dark);
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn.primary:hover {
|
||||||
|
box-shadow: 0 4px 15px rgba(80, 255, 255, 0.3);
|
||||||
|
transform: translateY(-2px);
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn.secondary {
|
||||||
|
border-color: var(--accent-pink);
|
||||||
|
color: var(--accent-pink);
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn.secondary:hover {
|
||||||
|
background: rgba(243, 128, 245, 0.1);
|
||||||
|
box-shadow: 0 4px 15px rgba(243, 128, 245, 0.2);
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn.ghost {
|
||||||
|
border-color: var(--border-color);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn.ghost:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pricing */
|
||||||
|
.pricing-info {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 1rem;
|
||||||
|
font-size: 1.1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.pricing-label {
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.pricing-value {
|
||||||
|
color: var(--warning);
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Navigation Tabs */
|
||||||
|
.app-nav {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 2rem auto 0;
|
||||||
|
padding: 0 2rem;
|
||||||
|
display: flex;
|
||||||
|
gap: 1rem;
|
||||||
|
border-bottom: 2px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-tab {
|
||||||
|
padding: 1rem 1.5rem;
|
||||||
|
background: transparent;
|
||||||
|
border: none;
|
||||||
|
border-bottom: 2px solid transparent;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
margin-bottom: -2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-tab:hover {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-tab.active {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
border-bottom-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Content Sections */
|
||||||
|
.app-content {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 2rem auto;
|
||||||
|
padding: 0 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tab-content {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tab-content.active {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
.docs-content {
|
||||||
|
max-width: 1200px;
|
||||||
|
padding: 2rem;
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.docs-content h2 {
|
||||||
|
font-size: 1.8rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
padding-bottom: 0.5rem;
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.docs-content h3 {
|
||||||
|
font-size: 1.3rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin: 2rem 0 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.docs-content h4 {
|
||||||
|
font-size: 1.1rem;
|
||||||
|
color: var(--accent-pink);
|
||||||
|
margin: 1.5rem 0 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.docs-content p {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
line-height: 1.6;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.docs-content code {
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
padding: 0.2rem 0.4rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
font-family: 'Dank Mono', Monaco, monospace;
|
||||||
|
font-size: 0.9em;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Code Blocks */
|
||||||
|
.code-block {
|
||||||
|
background: var(--bg-dark);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
margin: 1rem 0;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.code-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.code-lang {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
font-size: 0.875rem;
|
||||||
|
text-transform: uppercase;
|
||||||
|
}
|
||||||
|
|
||||||
|
.copy-btn {
|
||||||
|
padding: 0.25rem 0.5rem;
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.copy-btn:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.code-block pre {
|
||||||
|
margin: 0;
|
||||||
|
padding: 1rem;
|
||||||
|
overflow-x: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.code-block code {
|
||||||
|
background: transparent;
|
||||||
|
padding: 0;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 0.875rem;
|
||||||
|
line-height: 1.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Feature Grid */
|
||||||
|
.feature-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
||||||
|
gap: 1rem;
|
||||||
|
margin: 2rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feature-card {
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
padding: 1.5rem;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feature-card:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
background: rgba(80, 255, 255, 0.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.feature-card h4 {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Info Box */
|
||||||
|
.info-box {
|
||||||
|
background: linear-gradient(135deg, rgba(80, 255, 255, 0.05), rgba(243, 128, 245, 0.03));
|
||||||
|
border: 1px solid var(--primary-cyan);
|
||||||
|
border-left: 4px solid var(--primary-cyan);
|
||||||
|
padding: 1.5rem;
|
||||||
|
margin: 2rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-box h4 {
|
||||||
|
margin-top: 0;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Support Grid */
|
||||||
|
.support-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
||||||
|
gap: 1rem;
|
||||||
|
margin: 2rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.support-card {
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
padding: 1.5rem;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.support-card h3 {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Related Apps */
|
||||||
|
.related-apps {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 4rem auto;
|
||||||
|
padding: 0 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.related-apps h2 {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin-bottom: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.related-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fill, minmax(250px, 1fr));
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.related-app-card {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
padding: 1rem;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.related-app-card:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
transform: translateY(-2px);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive */
|
||||||
|
@media (max-width: 1024px) {
|
||||||
|
.app-hero-content {
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-stats {
|
||||||
|
justify-content: space-around;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: 768px) {
|
||||||
|
.app-hero-info h1 {
|
||||||
|
font-size: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-actions {
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-nav {
|
||||||
|
overflow-x: auto;
|
||||||
|
gap: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-tab {
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feature-grid,
|
||||||
|
.support-grid {
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
}
|
||||||
|
}
|
||||||
234
docs/md_v2/marketplace/frontend/app-detail.html
Normal file
234
docs/md_v2/marketplace/frontend/app-detail.html
Normal file
@@ -0,0 +1,234 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en" data-theme="dark">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>App Details - Crawl4AI Marketplace</title>
|
||||||
|
<link rel="stylesheet" href="marketplace.css">
|
||||||
|
<link rel="stylesheet" href="app-detail.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="app-detail-container">
|
||||||
|
<!-- Header -->
|
||||||
|
<header class="marketplace-header">
|
||||||
|
<div class="header-content">
|
||||||
|
<div class="header-left">
|
||||||
|
<div class="logo-title">
|
||||||
|
<img src="../../assets/images/logo.png" alt="Crawl4AI" class="header-logo">
|
||||||
|
<h1>
|
||||||
|
<span class="ascii-border">[</span>
|
||||||
|
Marketplace
|
||||||
|
<span class="ascii-border">]</span>
|
||||||
|
</h1>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="header-nav">
|
||||||
|
<a href="index.html" class="back-btn">← Back to Marketplace</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<!-- App Hero Section -->
|
||||||
|
<section class="app-hero">
|
||||||
|
<div class="app-hero-content">
|
||||||
|
<div class="app-hero-image" id="app-image">
|
||||||
|
<!-- Dynamic image -->
|
||||||
|
</div>
|
||||||
|
<div class="app-hero-info">
|
||||||
|
<div class="app-badges">
|
||||||
|
<span class="app-badge" id="app-type">Open Source</span>
|
||||||
|
<span class="app-badge featured" id="app-featured" style="display:none">FEATURED</span>
|
||||||
|
<span class="app-badge sponsored" id="app-sponsored" style="display:none">SPONSORED</span>
|
||||||
|
</div>
|
||||||
|
<h1 id="app-name">App Name</h1>
|
||||||
|
<p id="app-description" class="app-tagline">App description goes here</p>
|
||||||
|
|
||||||
|
<div class="app-stats">
|
||||||
|
<div class="stat">
|
||||||
|
<span class="stat-value" id="app-rating">★★★★★</span>
|
||||||
|
<span class="stat-label">Rating</span>
|
||||||
|
</div>
|
||||||
|
<div class="stat">
|
||||||
|
<span class="stat-value" id="app-downloads">0</span>
|
||||||
|
<span class="stat-label">Downloads</span>
|
||||||
|
</div>
|
||||||
|
<div class="stat">
|
||||||
|
<span class="stat-value" id="app-category">Category</span>
|
||||||
|
<span class="stat-label">Category</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="app-actions">
|
||||||
|
<a href="#" id="app-website" class="action-btn primary" target="_blank">
|
||||||
|
<span>→</span> Visit Website
|
||||||
|
</a>
|
||||||
|
<a href="#" id="app-github" class="action-btn secondary" target="_blank">
|
||||||
|
<span>⚡</span> View on GitHub
|
||||||
|
</a>
|
||||||
|
<button id="copy-integration" class="action-btn ghost">
|
||||||
|
<span>📋</span> Copy Integration
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="pricing-info">
|
||||||
|
<span class="pricing-label">Pricing:</span>
|
||||||
|
<span id="app-pricing" class="pricing-value">Free</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Navigation Tabs -->
|
||||||
|
<nav class="app-nav">
|
||||||
|
<button class="nav-tab active" data-tab="integration">Integration Guide</button>
|
||||||
|
<button class="nav-tab" data-tab="docs">Documentation</button>
|
||||||
|
<button class="nav-tab" data-tab="examples">Examples</button>
|
||||||
|
<button class="nav-tab" data-tab="support">Support</button>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<!-- Content Sections -->
|
||||||
|
<main class="app-content">
|
||||||
|
<!-- Integration Guide Tab -->
|
||||||
|
<section id="integration-tab" class="tab-content active">
|
||||||
|
<div class="docs-content">
|
||||||
|
<h2>Quick Start</h2>
|
||||||
|
<p>Get started with this integration in just a few steps.</p>
|
||||||
|
|
||||||
|
<h3>Installation</h3>
|
||||||
|
<div class="code-block">
|
||||||
|
<div class="code-header">
|
||||||
|
<span class="code-lang">bash</span>
|
||||||
|
<button class="copy-btn">Copy</button>
|
||||||
|
</div>
|
||||||
|
<pre><code id="install-code">pip install crawl4ai</code></pre>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<h3>Basic Usage</h3>
|
||||||
|
<div class="code-block">
|
||||||
|
<div class="code-header">
|
||||||
|
<span class="code-lang">python</span>
|
||||||
|
<button class="copy-btn">Copy</button>
|
||||||
|
</div>
|
||||||
|
<pre><code id="usage-code">from crawl4ai import AsyncWebCrawler
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
async with AsyncWebCrawler() as crawler:
|
||||||
|
result = await crawler.arun(
|
||||||
|
url="https://example.com",
|
||||||
|
# Your configuration here
|
||||||
|
)
|
||||||
|
print(result.markdown)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import asyncio
|
||||||
|
asyncio.run(main())</code></pre>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<h3>Advanced Configuration</h3>
|
||||||
|
<p>Customize the crawler with these advanced options:</p>
|
||||||
|
|
||||||
|
<div class="feature-grid">
|
||||||
|
<div class="feature-card">
|
||||||
|
<h4>🚀 Performance</h4>
|
||||||
|
<p>Optimize crawling speed with parallel processing and caching strategies.</p>
|
||||||
|
</div>
|
||||||
|
<div class="feature-card">
|
||||||
|
<h4>🔒 Authentication</h4>
|
||||||
|
<p>Handle login forms, cookies, and session management automatically.</p>
|
||||||
|
</div>
|
||||||
|
<div class="feature-card">
|
||||||
|
<h4>🎯 Extraction</h4>
|
||||||
|
<p>Use CSS selectors, XPath, or AI-powered content extraction.</p>
|
||||||
|
</div>
|
||||||
|
<div class="feature-card">
|
||||||
|
<h4>🔄 Proxy Support</h4>
|
||||||
|
<p>Rotate proxies and bypass rate limiting with built-in proxy management.</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<h3>Integration Example</h3>
|
||||||
|
<div class="code-block">
|
||||||
|
<div class="code-header">
|
||||||
|
<span class="code-lang">python</span>
|
||||||
|
<button class="copy-btn">Copy</button>
|
||||||
|
</div>
|
||||||
|
<pre><code id="integration-code">from crawl4ai import AsyncWebCrawler
|
||||||
|
from crawl4ai.extraction_strategy import LLMExtractionStrategy
|
||||||
|
|
||||||
|
async def extract_with_llm():
|
||||||
|
async with AsyncWebCrawler() as crawler:
|
||||||
|
result = await crawler.arun(
|
||||||
|
url="https://example.com",
|
||||||
|
extraction_strategy=LLMExtractionStrategy(
|
||||||
|
provider="openai",
|
||||||
|
api_key="your-api-key",
|
||||||
|
instruction="Extract product information"
|
||||||
|
),
|
||||||
|
bypass_cache=True
|
||||||
|
)
|
||||||
|
return result.extracted_content
|
||||||
|
|
||||||
|
# Run the extraction
|
||||||
|
data = await extract_with_llm()
|
||||||
|
print(data)</code></pre>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="info-box">
|
||||||
|
<h4>💡 Pro Tip</h4>
|
||||||
|
<p>Use the <code>bypass_cache=True</code> parameter when you need fresh data, or set <code>cache_mode="write"</code> to update the cache with new content.</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Documentation Tab -->
|
||||||
|
<section id="docs-tab" class="tab-content">
|
||||||
|
<div class="docs-content">
|
||||||
|
<h2>Documentation</h2>
|
||||||
|
<p>Complete documentation and API reference.</p>
|
||||||
|
<!-- Dynamic content loaded here -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Examples Tab -->
|
||||||
|
<section id="examples-tab" class="tab-content">
|
||||||
|
<div class="docs-content">
|
||||||
|
<h2>Examples</h2>
|
||||||
|
<p>Real-world examples and use cases.</p>
|
||||||
|
<!-- Dynamic content loaded here -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Support Tab -->
|
||||||
|
<section id="support-tab" class="tab-content">
|
||||||
|
<div class="docs-content">
|
||||||
|
<h2>Support</h2>
|
||||||
|
<div class="support-grid">
|
||||||
|
<div class="support-card">
|
||||||
|
<h3>📧 Contact</h3>
|
||||||
|
<p id="app-contact">contact@example.com</p>
|
||||||
|
</div>
|
||||||
|
<div class="support-card">
|
||||||
|
<h3>🐛 Report Issues</h3>
|
||||||
|
<p>Found a bug? Report it on GitHub Issues.</p>
|
||||||
|
</div>
|
||||||
|
<div class="support-card">
|
||||||
|
<h3>💬 Community</h3>
|
||||||
|
<p>Join our Discord for help and discussions.</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<!-- Related Apps -->
|
||||||
|
<section class="related-apps">
|
||||||
|
<h2>Related Apps</h2>
|
||||||
|
<div id="related-apps-grid" class="related-grid">
|
||||||
|
<!-- Dynamic related apps -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script src="app-detail.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
334
docs/md_v2/marketplace/frontend/app-detail.js
Normal file
334
docs/md_v2/marketplace/frontend/app-detail.js
Normal file
@@ -0,0 +1,334 @@
|
|||||||
|
// App Detail Page JavaScript
|
||||||
|
const { API_BASE, API_ORIGIN } = (() => {
|
||||||
|
const { hostname, port, protocol } = window.location;
|
||||||
|
const isLocalHost = ['localhost', '127.0.0.1', '0.0.0.0'].includes(hostname);
|
||||||
|
|
||||||
|
if (isLocalHost && port && port !== '8100') {
|
||||||
|
const origin = `${protocol}//127.0.0.1:8100`;
|
||||||
|
return { API_BASE: `${origin}/marketplace/api`, API_ORIGIN: origin };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { API_BASE: '/marketplace/api', API_ORIGIN: '' };
|
||||||
|
})();
|
||||||
|
|
||||||
|
class AppDetailPage {
|
||||||
|
constructor() {
|
||||||
|
this.appSlug = this.getAppSlugFromURL();
|
||||||
|
this.appData = null;
|
||||||
|
this.init();
|
||||||
|
}
|
||||||
|
|
||||||
|
getAppSlugFromURL() {
|
||||||
|
const params = new URLSearchParams(window.location.search);
|
||||||
|
return params.get('app') || '';
|
||||||
|
}
|
||||||
|
|
||||||
|
async init() {
|
||||||
|
if (!this.appSlug) {
|
||||||
|
window.location.href = 'index.html';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.loadAppDetails();
|
||||||
|
this.setupEventListeners();
|
||||||
|
await this.loadRelatedApps();
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadAppDetails() {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${API_BASE}/apps/${this.appSlug}`);
|
||||||
|
if (!response.ok) throw new Error('App not found');
|
||||||
|
|
||||||
|
this.appData = await response.json();
|
||||||
|
this.renderAppDetails();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error loading app details:', error);
|
||||||
|
// Fallback to loading all apps and finding the right one
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${API_BASE}/apps`);
|
||||||
|
const apps = await response.json();
|
||||||
|
this.appData = apps.find(app => app.slug === this.appSlug || app.name.toLowerCase().replace(/\s+/g, '-') === this.appSlug);
|
||||||
|
if (this.appData) {
|
||||||
|
this.renderAppDetails();
|
||||||
|
} else {
|
||||||
|
window.location.href = 'index.html';
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Error loading apps:', err);
|
||||||
|
window.location.href = 'index.html';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
renderAppDetails() {
|
||||||
|
if (!this.appData) return;
|
||||||
|
|
||||||
|
// Update title
|
||||||
|
document.title = `${this.appData.name} - Crawl4AI Marketplace`;
|
||||||
|
|
||||||
|
// Hero image
|
||||||
|
const appImage = document.getElementById('app-image');
|
||||||
|
if (this.appData.image) {
|
||||||
|
appImage.style.backgroundImage = `url('${this.appData.image}')`;
|
||||||
|
appImage.innerHTML = '';
|
||||||
|
} else {
|
||||||
|
appImage.innerHTML = `[${this.appData.category || 'APP'}]`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic info
|
||||||
|
document.getElementById('app-name').textContent = this.appData.name;
|
||||||
|
document.getElementById('app-description').textContent = this.appData.description;
|
||||||
|
document.getElementById('app-type').textContent = this.appData.type || 'Open Source';
|
||||||
|
document.getElementById('app-category').textContent = this.appData.category;
|
||||||
|
document.getElementById('app-pricing').textContent = this.appData.pricing || 'Free';
|
||||||
|
|
||||||
|
// Badges
|
||||||
|
if (this.appData.featured) {
|
||||||
|
document.getElementById('app-featured').style.display = 'inline-block';
|
||||||
|
}
|
||||||
|
if (this.appData.sponsored) {
|
||||||
|
document.getElementById('app-sponsored').style.display = 'inline-block';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stats
|
||||||
|
const rating = this.appData.rating || 0;
|
||||||
|
const stars = '★'.repeat(Math.floor(rating)) + '☆'.repeat(5 - Math.floor(rating));
|
||||||
|
document.getElementById('app-rating').textContent = stars + ` ${rating}/5`;
|
||||||
|
document.getElementById('app-downloads').textContent = this.formatNumber(this.appData.downloads || 0);
|
||||||
|
|
||||||
|
// Action buttons
|
||||||
|
const websiteBtn = document.getElementById('app-website');
|
||||||
|
const githubBtn = document.getElementById('app-github');
|
||||||
|
|
||||||
|
if (this.appData.website_url) {
|
||||||
|
websiteBtn.href = this.appData.website_url;
|
||||||
|
} else {
|
||||||
|
websiteBtn.style.display = 'none';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.appData.github_url) {
|
||||||
|
githubBtn.href = this.appData.github_url;
|
||||||
|
} else {
|
||||||
|
githubBtn.style.display = 'none';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Contact
|
||||||
|
document.getElementById('app-contact').textContent = this.appData.contact_email || 'Not available';
|
||||||
|
|
||||||
|
// Integration guide
|
||||||
|
this.renderIntegrationGuide();
|
||||||
|
}
|
||||||
|
|
||||||
|
renderIntegrationGuide() {
|
||||||
|
// Installation code
|
||||||
|
const installCode = document.getElementById('install-code');
|
||||||
|
if (this.appData.type === 'Open Source' && this.appData.github_url) {
|
||||||
|
installCode.textContent = `# Clone from GitHub
|
||||||
|
git clone ${this.appData.github_url}
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
pip install -r requirements.txt`;
|
||||||
|
} else if (this.appData.name.toLowerCase().includes('api')) {
|
||||||
|
installCode.textContent = `# Install via pip
|
||||||
|
pip install ${this.appData.slug}
|
||||||
|
|
||||||
|
# Or install from source
|
||||||
|
pip install git+${this.appData.github_url || 'https://github.com/example/repo'}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Usage code - customize based on category
|
||||||
|
const usageCode = document.getElementById('usage-code');
|
||||||
|
if (this.appData.category === 'Browser Automation') {
|
||||||
|
usageCode.textContent = `from crawl4ai import AsyncWebCrawler
|
||||||
|
from ${this.appData.slug.replace(/-/g, '_')} import ${this.appData.name.replace(/\s+/g, '')}
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
# Initialize ${this.appData.name}
|
||||||
|
automation = ${this.appData.name.replace(/\s+/g, '')}()
|
||||||
|
|
||||||
|
async with AsyncWebCrawler() as crawler:
|
||||||
|
result = await crawler.arun(
|
||||||
|
url="https://example.com",
|
||||||
|
browser_config=automation.config,
|
||||||
|
wait_for="css:body"
|
||||||
|
)
|
||||||
|
print(result.markdown)`;
|
||||||
|
} else if (this.appData.category === 'Proxy Services') {
|
||||||
|
usageCode.textContent = `from crawl4ai import AsyncWebCrawler
|
||||||
|
import ${this.appData.slug.replace(/-/g, '_')}
|
||||||
|
|
||||||
|
# Configure proxy
|
||||||
|
proxy_config = {
|
||||||
|
"server": "${this.appData.website_url || 'https://proxy.example.com'}",
|
||||||
|
"username": "your_username",
|
||||||
|
"password": "your_password"
|
||||||
|
}
|
||||||
|
|
||||||
|
async with AsyncWebCrawler(proxy=proxy_config) as crawler:
|
||||||
|
result = await crawler.arun(
|
||||||
|
url="https://example.com",
|
||||||
|
bypass_cache=True
|
||||||
|
)
|
||||||
|
print(result.status_code)`;
|
||||||
|
} else if (this.appData.category === 'LLM Integration') {
|
||||||
|
usageCode.textContent = `from crawl4ai import AsyncWebCrawler
|
||||||
|
from crawl4ai.extraction_strategy import LLMExtractionStrategy
|
||||||
|
|
||||||
|
# Configure LLM extraction
|
||||||
|
strategy = LLMExtractionStrategy(
|
||||||
|
provider="${this.appData.name.toLowerCase().includes('gpt') ? 'openai' : 'anthropic'}",
|
||||||
|
api_key="your-api-key",
|
||||||
|
model="${this.appData.name.toLowerCase().includes('gpt') ? 'gpt-4' : 'claude-3'}",
|
||||||
|
instruction="Extract structured data"
|
||||||
|
)
|
||||||
|
|
||||||
|
async with AsyncWebCrawler() as crawler:
|
||||||
|
result = await crawler.arun(
|
||||||
|
url="https://example.com",
|
||||||
|
extraction_strategy=strategy
|
||||||
|
)
|
||||||
|
print(result.extracted_content)`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Integration example
|
||||||
|
const integrationCode = document.getElementById('integration-code');
|
||||||
|
integrationCode.textContent = this.appData.integration_guide ||
|
||||||
|
`# Complete ${this.appData.name} Integration Example
|
||||||
|
|
||||||
|
from crawl4ai import AsyncWebCrawler
|
||||||
|
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
|
||||||
|
import json
|
||||||
|
|
||||||
|
async def crawl_with_${this.appData.slug.replace(/-/g, '_')}():
|
||||||
|
"""
|
||||||
|
Complete example showing how to use ${this.appData.name}
|
||||||
|
with Crawl4AI for production web scraping
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Define extraction schema
|
||||||
|
schema = {
|
||||||
|
"name": "ProductList",
|
||||||
|
"baseSelector": "div.product",
|
||||||
|
"fields": [
|
||||||
|
{"name": "title", "selector": "h2", "type": "text"},
|
||||||
|
{"name": "price", "selector": ".price", "type": "text"},
|
||||||
|
{"name": "image", "selector": "img", "type": "attribute", "attribute": "src"},
|
||||||
|
{"name": "link", "selector": "a", "type": "attribute", "attribute": "href"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Initialize crawler with ${this.appData.name}
|
||||||
|
async with AsyncWebCrawler(
|
||||||
|
browser_type="chromium",
|
||||||
|
headless=True,
|
||||||
|
verbose=True
|
||||||
|
) as crawler:
|
||||||
|
|
||||||
|
# Crawl with extraction
|
||||||
|
result = await crawler.arun(
|
||||||
|
url="https://example.com/products",
|
||||||
|
extraction_strategy=JsonCssExtractionStrategy(schema),
|
||||||
|
cache_mode="bypass",
|
||||||
|
wait_for="css:.product",
|
||||||
|
screenshot=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process results
|
||||||
|
if result.success:
|
||||||
|
products = json.loads(result.extracted_content)
|
||||||
|
print(f"Found {len(products)} products")
|
||||||
|
|
||||||
|
for product in products[:5]:
|
||||||
|
print(f"- {product['title']}: {product['price']}")
|
||||||
|
|
||||||
|
return products
|
||||||
|
|
||||||
|
# Run the crawler
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import asyncio
|
||||||
|
asyncio.run(crawl_with_${this.appData.slug.replace(/-/g, '_')}())`;
|
||||||
|
}
|
||||||
|
|
||||||
|
formatNumber(num) {
|
||||||
|
if (num >= 1000000) {
|
||||||
|
return (num / 1000000).toFixed(1) + 'M';
|
||||||
|
} else if (num >= 1000) {
|
||||||
|
return (num / 1000).toFixed(1) + 'K';
|
||||||
|
}
|
||||||
|
return num.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
setupEventListeners() {
|
||||||
|
// Tab switching
|
||||||
|
const tabs = document.querySelectorAll('.nav-tab');
|
||||||
|
tabs.forEach(tab => {
|
||||||
|
tab.addEventListener('click', () => {
|
||||||
|
// Update active tab
|
||||||
|
tabs.forEach(t => t.classList.remove('active'));
|
||||||
|
tab.classList.add('active');
|
||||||
|
|
||||||
|
// Show corresponding content
|
||||||
|
const tabName = tab.dataset.tab;
|
||||||
|
document.querySelectorAll('.tab-content').forEach(content => {
|
||||||
|
content.classList.remove('active');
|
||||||
|
});
|
||||||
|
document.getElementById(`${tabName}-tab`).classList.add('active');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Copy integration code
|
||||||
|
document.getElementById('copy-integration').addEventListener('click', () => {
|
||||||
|
const code = document.getElementById('integration-code').textContent;
|
||||||
|
navigator.clipboard.writeText(code).then(() => {
|
||||||
|
const btn = document.getElementById('copy-integration');
|
||||||
|
const originalText = btn.innerHTML;
|
||||||
|
btn.innerHTML = '<span>✓</span> Copied!';
|
||||||
|
setTimeout(() => {
|
||||||
|
btn.innerHTML = originalText;
|
||||||
|
}, 2000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Copy code buttons
|
||||||
|
document.querySelectorAll('.copy-btn').forEach(btn => {
|
||||||
|
btn.addEventListener('click', (e) => {
|
||||||
|
const codeBlock = e.target.closest('.code-block');
|
||||||
|
const code = codeBlock.querySelector('code').textContent;
|
||||||
|
navigator.clipboard.writeText(code).then(() => {
|
||||||
|
btn.textContent = 'Copied!';
|
||||||
|
setTimeout(() => {
|
||||||
|
btn.textContent = 'Copy';
|
||||||
|
}, 2000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadRelatedApps() {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${API_BASE}/apps?category=${encodeURIComponent(this.appData.category)}&limit=4`);
|
||||||
|
const apps = await response.json();
|
||||||
|
|
||||||
|
const relatedApps = apps.filter(app => app.slug !== this.appSlug).slice(0, 3);
|
||||||
|
const grid = document.getElementById('related-apps-grid');
|
||||||
|
|
||||||
|
grid.innerHTML = relatedApps.map(app => `
|
||||||
|
<div class="related-app-card" onclick="window.location.href='app-detail.html?app=${app.slug || app.name.toLowerCase().replace(/\s+/g, '-')}'">
|
||||||
|
<h4>${app.name}</h4>
|
||||||
|
<p>${app.description.substring(0, 100)}...</p>
|
||||||
|
<div style="display: flex; justify-content: space-between; margin-top: 0.5rem; font-size: 0.75rem;">
|
||||||
|
<span style="color: var(--primary-cyan)">${app.type}</span>
|
||||||
|
<span style="color: var(--warning)">★ ${app.rating}/5</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error loading related apps:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize when DOM is loaded
|
||||||
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
|
new AppDetailPage();
|
||||||
|
});
|
||||||
147
docs/md_v2/marketplace/frontend/index.html
Normal file
147
docs/md_v2/marketplace/frontend/index.html
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en" data-theme="dark">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Marketplace - Crawl4AI</title>
|
||||||
|
<link rel="stylesheet" href="marketplace.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="marketplace-container">
|
||||||
|
<!-- Header -->
|
||||||
|
<header class="marketplace-header">
|
||||||
|
<div class="header-content">
|
||||||
|
<div class="header-left">
|
||||||
|
<div class="logo-title">
|
||||||
|
<img src="../../assets/images/logo.png" alt="Crawl4AI" class="header-logo">
|
||||||
|
<h1>
|
||||||
|
<span class="ascii-border">[</span>
|
||||||
|
Marketplace
|
||||||
|
<span class="ascii-border">]</span>
|
||||||
|
</h1>
|
||||||
|
</div>
|
||||||
|
<p class="tagline">Tools, Integrations & Resources for Web Crawling</p>
|
||||||
|
</div>
|
||||||
|
<div class="header-stats" id="stats">
|
||||||
|
<span class="stat-item">Apps: <span id="total-apps">--</span></span>
|
||||||
|
<span class="stat-item">Articles: <span id="total-articles">--</span></span>
|
||||||
|
<span class="stat-item">Downloads: <span id="total-downloads">--</span></span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<!-- Search and Category Bar -->
|
||||||
|
<div class="search-filter-bar">
|
||||||
|
<div class="search-box">
|
||||||
|
<span class="search-icon">></span>
|
||||||
|
<input type="text" id="search-input" placeholder="Search apps, articles, tools..." />
|
||||||
|
<kbd>/</kbd>
|
||||||
|
</div>
|
||||||
|
<div class="category-filter" id="category-filter">
|
||||||
|
<button class="filter-btn active" data-category="all">All</button>
|
||||||
|
<!-- Categories will be loaded here -->
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Magazine Grid Layout -->
|
||||||
|
<main class="magazine-layout">
|
||||||
|
<!-- Hero Featured Section -->
|
||||||
|
<section class="hero-featured">
|
||||||
|
<div id="featured-hero" class="featured-hero-card">
|
||||||
|
<!-- Large featured card with big image -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Secondary Featured -->
|
||||||
|
<section class="secondary-featured">
|
||||||
|
<div id="featured-secondary" class="featured-secondary-cards">
|
||||||
|
<!-- 2-3 medium featured cards with images -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Sponsored Section -->
|
||||||
|
<section class="sponsored-section">
|
||||||
|
<div class="section-label">SPONSORED</div>
|
||||||
|
<div id="sponsored-content" class="sponsored-cards">
|
||||||
|
<!-- Sponsored content cards -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Main Content Grid -->
|
||||||
|
<section class="main-content">
|
||||||
|
<!-- Apps Column -->
|
||||||
|
<div class="apps-column">
|
||||||
|
<div class="column-header">
|
||||||
|
<h2><span class="ascii-icon">></span> Latest Apps</h2>
|
||||||
|
<select id="type-filter" class="mini-filter">
|
||||||
|
<option value="">All</option>
|
||||||
|
<option value="Open Source">Open Source</option>
|
||||||
|
<option value="Paid">Paid</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div id="apps-grid" class="apps-compact-grid">
|
||||||
|
<!-- Compact app cards -->
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Articles Column -->
|
||||||
|
<div class="articles-column">
|
||||||
|
<div class="column-header">
|
||||||
|
<h2><span class="ascii-icon">></span> Latest Articles</h2>
|
||||||
|
</div>
|
||||||
|
<div id="articles-list" class="articles-compact-list">
|
||||||
|
<!-- Article items -->
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Trending/Tools Column -->
|
||||||
|
<div class="trending-column">
|
||||||
|
<div class="column-header">
|
||||||
|
<h2><span class="ascii-icon">#</span> Trending</h2>
|
||||||
|
</div>
|
||||||
|
<div id="trending-list" class="trending-items">
|
||||||
|
<!-- Trending items -->
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="submit-box">
|
||||||
|
<h3><span class="ascii-icon">+</span> Submit Your Tool</h3>
|
||||||
|
<p>Share your integration</p>
|
||||||
|
<a href="mailto:marketplace@crawl4ai.com" class="submit-btn">Submit →</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- More Apps Grid -->
|
||||||
|
<section class="more-apps">
|
||||||
|
<div class="section-header">
|
||||||
|
<h2><span class="ascii-icon">></span> More Apps</h2>
|
||||||
|
<button id="load-more" class="load-more-btn">Load More ↓</button>
|
||||||
|
</div>
|
||||||
|
<div id="more-apps-grid" class="more-apps-grid">
|
||||||
|
<!-- Additional app cards -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<!-- Footer -->
|
||||||
|
<footer class="marketplace-footer">
|
||||||
|
<div class="footer-content">
|
||||||
|
<div class="footer-section">
|
||||||
|
<h3>About Marketplace</h3>
|
||||||
|
<p>Discover tools and integrations built by the Crawl4AI community.</p>
|
||||||
|
</div>
|
||||||
|
<div class="footer-section">
|
||||||
|
<h3>Become a Sponsor</h3>
|
||||||
|
<p>Reach developers building with Crawl4AI</p>
|
||||||
|
<a href="mailto:sponsors@crawl4ai.com" class="sponsor-btn">Learn More →</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="footer-bottom">
|
||||||
|
<p>[ Crawl4AI Marketplace · Updated <span id="last-update">--</span> ]</p>
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script src="marketplace.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
957
docs/md_v2/marketplace/frontend/marketplace.css
Normal file
957
docs/md_v2/marketplace/frontend/marketplace.css
Normal file
@@ -0,0 +1,957 @@
|
|||||||
|
/* Marketplace CSS - Magazine Style Terminal Theme */
|
||||||
|
@import url('../../assets/styles.css');
|
||||||
|
|
||||||
|
:root {
|
||||||
|
--primary-cyan: #50ffff;
|
||||||
|
--primary-teal: #09b5a5;
|
||||||
|
--accent-pink: #f380f5;
|
||||||
|
--bg-dark: #070708;
|
||||||
|
--bg-secondary: #1a1a1a;
|
||||||
|
--bg-tertiary: #3f3f44;
|
||||||
|
--text-primary: #e8e9ed;
|
||||||
|
--text-secondary: #d5cec0;
|
||||||
|
--text-tertiary: #a3abba;
|
||||||
|
--border-color: #3f3f44;
|
||||||
|
--success: #50ff50;
|
||||||
|
--error: #ff3c74;
|
||||||
|
--warning: #f59e0b;
|
||||||
|
}
|
||||||
|
|
||||||
|
* {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
font-family: 'Dank Mono', Monaco, monospace;
|
||||||
|
background: var(--bg-dark);
|
||||||
|
color: var(--text-primary);
|
||||||
|
line-height: 1.6;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Global link styles */
|
||||||
|
a {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
text-decoration: none;
|
||||||
|
transition: color 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
a:hover {
|
||||||
|
color: var(--accent-pink);
|
||||||
|
}
|
||||||
|
|
||||||
|
.marketplace-container {
|
||||||
|
min-height: 100vh;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Header */
|
||||||
|
.marketplace-header {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
padding: 1.5rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-content {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 0 2rem;
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.logo-title {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-logo {
|
||||||
|
height: 40px;
|
||||||
|
width: auto;
|
||||||
|
filter: brightness(1.2);
|
||||||
|
}
|
||||||
|
|
||||||
|
.marketplace-header h1 {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ascii-border {
|
||||||
|
color: var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.tagline {
|
||||||
|
font-size: 0.875rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
margin-top: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-stats {
|
||||||
|
display: flex;
|
||||||
|
gap: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-item {
|
||||||
|
font-size: 0.875rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-item span {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Search and Filter Bar */
|
||||||
|
.search-filter-bar {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 1.5rem auto;
|
||||||
|
padding: 0 2rem;
|
||||||
|
display: flex;
|
||||||
|
gap: 1rem;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-box {
|
||||||
|
flex: 1;
|
||||||
|
max-width: 500px;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
padding: 0.75rem 1rem;
|
||||||
|
transition: border-color 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-box:focus-within {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-icon {
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
margin-right: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
#search-input {
|
||||||
|
flex: 1;
|
||||||
|
background: transparent;
|
||||||
|
border: none;
|
||||||
|
color: var(--text-primary);
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
outline: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-box kbd {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
padding: 0.2rem 0.5rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.category-filter {
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5rem;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.filter-btn {
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.filter-btn:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.filter-btn.active {
|
||||||
|
background: var(--primary-cyan);
|
||||||
|
color: var(--bg-dark);
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Magazine Layout */
|
||||||
|
.magazine-layout {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 0 2rem 4rem;
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
gap: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Hero Featured Section */
|
||||||
|
.hero-featured {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-featured::before {
|
||||||
|
content: '';
|
||||||
|
position: absolute;
|
||||||
|
top: -20px;
|
||||||
|
left: -20px;
|
||||||
|
right: -20px;
|
||||||
|
bottom: -20px;
|
||||||
|
background: radial-gradient(ellipse at center, rgba(80, 255, 255, 0.05), transparent 70%);
|
||||||
|
pointer-events: none;
|
||||||
|
z-index: -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.featured-hero-card {
|
||||||
|
background: linear-gradient(135deg, #1a1a2e, #0f0f1e);
|
||||||
|
border: 2px solid var(--primary-cyan);
|
||||||
|
box-shadow: 0 0 30px rgba(80, 255, 255, 0.15),
|
||||||
|
inset 0 0 20px rgba(80, 255, 255, 0.05);
|
||||||
|
height: 380px;
|
||||||
|
position: relative;
|
||||||
|
overflow: hidden;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.3s ease;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.featured-hero-card:hover {
|
||||||
|
border-color: var(--accent-pink);
|
||||||
|
box-shadow: 0 0 40px rgba(243, 128, 245, 0.2),
|
||||||
|
inset 0 0 30px rgba(243, 128, 245, 0.05);
|
||||||
|
transform: translateY(-2px);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-image {
|
||||||
|
width: 100%;
|
||||||
|
height: 240px;
|
||||||
|
background: linear-gradient(135deg, rgba(80, 255, 255, 0.1), rgba(243, 128, 245, 0.05));
|
||||||
|
background-size: cover;
|
||||||
|
background-position: center;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
font-size: 3rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
flex-shrink: 0;
|
||||||
|
position: relative;
|
||||||
|
filter: brightness(1.1) contrast(1.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-image::after {
|
||||||
|
content: '';
|
||||||
|
position: absolute;
|
||||||
|
bottom: 0;
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
height: 60%;
|
||||||
|
background: linear-gradient(to top, rgba(10, 10, 20, 0.95), transparent);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-content {
|
||||||
|
padding: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-badge {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.3rem 0.6rem;
|
||||||
|
background: linear-gradient(135deg, var(--primary-cyan), var(--primary-teal));
|
||||||
|
color: var(--bg-dark);
|
||||||
|
font-size: 0.7rem;
|
||||||
|
text-transform: uppercase;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
font-weight: 600;
|
||||||
|
box-shadow: 0 2px 10px rgba(80, 255, 255, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-title {
|
||||||
|
font-size: 1.6rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin: 0.5rem 0;
|
||||||
|
text-shadow: 0 0 20px rgba(80, 255, 255, 0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-description {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
line-height: 1.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-meta {
|
||||||
|
display: flex;
|
||||||
|
gap: 1.5rem;
|
||||||
|
margin-top: 1rem;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-meta span {
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-meta span:first-child {
|
||||||
|
color: var(--warning);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Secondary Featured */
|
||||||
|
.secondary-featured {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
height: 380px;
|
||||||
|
display: flex;
|
||||||
|
align-items: stretch;
|
||||||
|
}
|
||||||
|
|
||||||
|
.featured-secondary-cards {
|
||||||
|
width: 100%;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.75rem;
|
||||||
|
justify-content: space-between;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-card {
|
||||||
|
background: linear-gradient(135deg, rgba(80, 255, 255, 0.03), rgba(243, 128, 245, 0.02));
|
||||||
|
border: 1px solid rgba(80, 255, 255, 0.3);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.3s ease;
|
||||||
|
display: flex;
|
||||||
|
overflow: hidden;
|
||||||
|
height: calc((380px - 1.5rem) / 3);
|
||||||
|
flex: 1;
|
||||||
|
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-card:hover {
|
||||||
|
border-color: var(--accent-pink);
|
||||||
|
background: linear-gradient(135deg, rgba(243, 128, 245, 0.05), rgba(80, 255, 255, 0.03));
|
||||||
|
box-shadow: 0 4px 15px rgba(243, 128, 245, 0.2);
|
||||||
|
transform: translateX(-3px);
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-image {
|
||||||
|
width: 120px;
|
||||||
|
background: linear-gradient(135deg, var(--bg-tertiary), var(--bg-secondary));
|
||||||
|
background-size: cover;
|
||||||
|
background-position: center;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
font-size: 1.5rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-content {
|
||||||
|
flex: 1;
|
||||||
|
padding: 1rem;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
justify-content: space-between;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-title {
|
||||||
|
font-size: 1rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-desc {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
display: -webkit-box;
|
||||||
|
-webkit-line-clamp: 2;
|
||||||
|
-webkit-box-orient: vertical;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-meta {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-meta span:last-child {
|
||||||
|
color: var(--warning);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sponsored Section */
|
||||||
|
.sponsored-section {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--warning);
|
||||||
|
padding: 1rem;
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section-label {
|
||||||
|
position: absolute;
|
||||||
|
top: -0.5rem;
|
||||||
|
left: 1rem;
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
padding: 0 0.5rem;
|
||||||
|
color: var(--warning);
|
||||||
|
font-size: 0.65rem;
|
||||||
|
letter-spacing: 0.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsored-cards {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-card {
|
||||||
|
padding: 1rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-card h4 {
|
||||||
|
color: var(--accent-pink);
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-card p {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 0.85rem;
|
||||||
|
margin-bottom: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-card a {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
text-decoration: none;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-card a:hover {
|
||||||
|
color: var(--accent-pink);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Main Content Grid */
|
||||||
|
.main-content {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
||||||
|
gap: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Column Headers */
|
||||||
|
.column-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
padding-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.column-header h2 {
|
||||||
|
font-size: 1.1rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.mini-filter {
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-primary);
|
||||||
|
padding: 0.25rem 0.5rem;
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ascii-icon {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Apps Column */
|
||||||
|
.apps-compact-grid {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
border-left: 3px solid var(--border-color);
|
||||||
|
padding: 0.75rem;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
border-left-color: var(--accent-pink);
|
||||||
|
transform: translateX(2px);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact-header span:first-child {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact-header span:last-child {
|
||||||
|
color: var(--warning);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact-title {
|
||||||
|
font-size: 0.9rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact-desc {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
display: -webkit-box;
|
||||||
|
-webkit-line-clamp: 2;
|
||||||
|
-webkit-box-orient: vertical;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Articles Column */
|
||||||
|
.articles-compact-list {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.article-compact {
|
||||||
|
border-left: 2px solid var(--border-color);
|
||||||
|
padding-left: 1rem;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.article-compact:hover {
|
||||||
|
border-left-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.article-meta {
|
||||||
|
font-size: 0.7rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.article-meta span:first-child {
|
||||||
|
color: var(--accent-pink);
|
||||||
|
}
|
||||||
|
|
||||||
|
.article-title {
|
||||||
|
font-size: 0.9rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.article-author {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Trending Column */
|
||||||
|
.trending-items {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.trending-item {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.75rem;
|
||||||
|
padding: 0.5rem;
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.trending-item:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.trending-rank {
|
||||||
|
font-size: 1.2rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
width: 2rem;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.trending-info {
|
||||||
|
flex: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.trending-name {
|
||||||
|
font-size: 0.85rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.trending-stats {
|
||||||
|
font-size: 0.7rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Submit Box */
|
||||||
|
.submit-box {
|
||||||
|
margin-top: 1.5rem;
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--primary-cyan);
|
||||||
|
padding: 1rem;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.submit-box h3 {
|
||||||
|
font-size: 1rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.submit-box p {
|
||||||
|
font-size: 0.8rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
margin-bottom: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.submit-btn {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
text-decoration: none;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.submit-btn:hover {
|
||||||
|
background: var(--primary-cyan);
|
||||||
|
color: var(--bg-dark);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* More Apps Section */
|
||||||
|
.more-apps {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
margin-top: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.more-apps-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.load-more-btn {
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
padding: 0.5rem 1.5rem;
|
||||||
|
font-family: inherit;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.load-more-btn:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Footer */
|
||||||
|
.marketplace-footer {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border-top: 1px solid var(--border-color);
|
||||||
|
margin-top: 4rem;
|
||||||
|
padding: 2rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.footer-content {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 0 2rem;
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr 1fr;
|
||||||
|
gap: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.footer-section h3 {
|
||||||
|
font-size: 1rem;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.footer-section p {
|
||||||
|
font-size: 0.875rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-btn {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
text-decoration: none;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-btn:hover {
|
||||||
|
background: var(--primary-cyan);
|
||||||
|
color: var(--bg-dark);
|
||||||
|
}
|
||||||
|
|
||||||
|
.footer-bottom {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 2rem auto 0;
|
||||||
|
padding: 1rem 2rem 0;
|
||||||
|
border-top: 1px solid var(--border-color);
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Modal */
|
||||||
|
.modal {
|
||||||
|
position: fixed;
|
||||||
|
top: 0;
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
bottom: 0;
|
||||||
|
background: rgba(0, 0, 0, 0.8);
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
z-index: 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
.modal.hidden {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.modal-content {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--primary-cyan);
|
||||||
|
max-width: 800px;
|
||||||
|
width: 90%;
|
||||||
|
max-height: 80vh;
|
||||||
|
overflow-y: auto;
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
|
||||||
|
.modal-close {
|
||||||
|
position: absolute;
|
||||||
|
top: 1rem;
|
||||||
|
right: 1rem;
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-primary);
|
||||||
|
padding: 0.25rem 0.5rem;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 1.2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.modal-close:hover {
|
||||||
|
border-color: var(--error);
|
||||||
|
color: var(--error);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-detail {
|
||||||
|
padding: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-detail h2 {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Loading */
|
||||||
|
.loading {
|
||||||
|
text-align: center;
|
||||||
|
padding: 2rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.no-results {
|
||||||
|
text-align: center;
|
||||||
|
padding: 2rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive - Tablet */
|
||||||
|
@media (min-width: 768px) {
|
||||||
|
.magazine-layout {
|
||||||
|
grid-template-columns: repeat(2, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-featured {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-featured {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsored-section {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.main-content {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
grid-template-columns: repeat(2, 1fr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive - Desktop */
|
||||||
|
@media (min-width: 1024px) {
|
||||||
|
.magazine-layout {
|
||||||
|
grid-template-columns: repeat(3, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-featured {
|
||||||
|
grid-column: 1 / 3;
|
||||||
|
grid-row: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-featured {
|
||||||
|
grid-column: 3 / 4;
|
||||||
|
grid-row: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.featured-secondary-cards {
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsored-section {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.main-content {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
grid-template-columns: repeat(3, 1fr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive - Wide Desktop */
|
||||||
|
@media (min-width: 1400px) {
|
||||||
|
.magazine-layout {
|
||||||
|
grid-template-columns: repeat(4, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-featured {
|
||||||
|
grid-column: 1 / 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-featured {
|
||||||
|
grid-column: 3 / 5;
|
||||||
|
grid-row: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.featured-secondary-cards {
|
||||||
|
grid-template-columns: repeat(2, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.main-content {
|
||||||
|
grid-template-columns: repeat(4, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.apps-column {
|
||||||
|
grid-column: span 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
.more-apps-grid {
|
||||||
|
grid-template-columns: repeat(auto-fill, minmax(250px, 1fr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive - Ultra Wide Desktop (for coders with wide monitors) */
|
||||||
|
@media (min-width: 1800px) {
|
||||||
|
.magazine-layout {
|
||||||
|
grid-template-columns: repeat(5, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-featured {
|
||||||
|
grid-column: 1 / 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-featured {
|
||||||
|
grid-column: 3 / 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
.featured-secondary-cards {
|
||||||
|
grid-template-columns: repeat(3, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsored-section {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsored-cards {
|
||||||
|
grid-template-columns: repeat(5, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.main-content {
|
||||||
|
grid-template-columns: repeat(5, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.apps-column {
|
||||||
|
grid-column: span 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
.articles-column {
|
||||||
|
grid-column: span 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
.more-apps-grid {
|
||||||
|
grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive - Mobile */
|
||||||
|
@media (max-width: 767px) {
|
||||||
|
.header-content {
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-filter-bar {
|
||||||
|
flex-direction: column;
|
||||||
|
align-items: stretch;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-box {
|
||||||
|
max-width: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.magazine-layout {
|
||||||
|
padding: 0 1rem 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.footer-content {
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-card {
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-image {
|
||||||
|
width: 100%;
|
||||||
|
height: 150px;
|
||||||
|
}
|
||||||
|
}
|
||||||
395
docs/md_v2/marketplace/frontend/marketplace.js
Normal file
395
docs/md_v2/marketplace/frontend/marketplace.js
Normal file
@@ -0,0 +1,395 @@
|
|||||||
|
// Marketplace JS - Magazine Layout
|
||||||
|
const API_BASE = '/marketplace/api';
|
||||||
|
const CACHE_TTL = 3600000; // 1 hour in ms
|
||||||
|
|
||||||
|
class MarketplaceCache {
|
||||||
|
constructor() {
|
||||||
|
this.prefix = 'c4ai_market_';
|
||||||
|
}
|
||||||
|
|
||||||
|
get(key) {
|
||||||
|
const item = localStorage.getItem(this.prefix + key);
|
||||||
|
if (!item) return null;
|
||||||
|
|
||||||
|
const data = JSON.parse(item);
|
||||||
|
if (Date.now() > data.expires) {
|
||||||
|
localStorage.removeItem(this.prefix + key);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return data.value;
|
||||||
|
}
|
||||||
|
|
||||||
|
set(key, value, ttl = CACHE_TTL) {
|
||||||
|
const data = {
|
||||||
|
value: value,
|
||||||
|
expires: Date.now() + ttl
|
||||||
|
};
|
||||||
|
localStorage.setItem(this.prefix + key, JSON.stringify(data));
|
||||||
|
}
|
||||||
|
|
||||||
|
clear() {
|
||||||
|
Object.keys(localStorage)
|
||||||
|
.filter(k => k.startsWith(this.prefix))
|
||||||
|
.forEach(k => localStorage.removeItem(k));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class MarketplaceAPI {
|
||||||
|
constructor() {
|
||||||
|
this.cache = new MarketplaceCache();
|
||||||
|
this.searchTimeout = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async fetch(endpoint, useCache = true) {
|
||||||
|
const cacheKey = endpoint.replace(/[^\w]/g, '_');
|
||||||
|
|
||||||
|
if (useCache) {
|
||||||
|
const cached = this.cache.get(cacheKey);
|
||||||
|
if (cached) return cached;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${API_BASE}${endpoint}`);
|
||||||
|
if (!response.ok) throw new Error(`HTTP ${response.status}`);
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
this.cache.set(cacheKey, data);
|
||||||
|
return data;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('API Error:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async getStats() {
|
||||||
|
return this.fetch('/stats');
|
||||||
|
}
|
||||||
|
|
||||||
|
async getCategories() {
|
||||||
|
return this.fetch('/categories');
|
||||||
|
}
|
||||||
|
|
||||||
|
async getApps(params = {}) {
|
||||||
|
const query = new URLSearchParams(params).toString();
|
||||||
|
return this.fetch(`/apps${query ? '?' + query : ''}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async getArticles(params = {}) {
|
||||||
|
const query = new URLSearchParams(params).toString();
|
||||||
|
return this.fetch(`/articles${query ? '?' + query : ''}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async getSponsors() {
|
||||||
|
return this.fetch('/sponsors');
|
||||||
|
}
|
||||||
|
|
||||||
|
async search(query) {
|
||||||
|
if (query.length < 2) return {};
|
||||||
|
return this.fetch(`/search?q=${encodeURIComponent(query)}`, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class MarketplaceUI {
|
||||||
|
constructor() {
|
||||||
|
this.api = new MarketplaceAPI();
|
||||||
|
this.currentCategory = 'all';
|
||||||
|
this.currentType = '';
|
||||||
|
this.searchTimeout = null;
|
||||||
|
this.loadedApps = 10;
|
||||||
|
this.init();
|
||||||
|
}
|
||||||
|
|
||||||
|
async init() {
|
||||||
|
await this.loadStats();
|
||||||
|
await this.loadCategories();
|
||||||
|
await this.loadFeaturedContent();
|
||||||
|
await this.loadSponsors();
|
||||||
|
await this.loadMainContent();
|
||||||
|
this.setupEventListeners();
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadStats() {
|
||||||
|
const stats = await this.api.getStats();
|
||||||
|
if (stats) {
|
||||||
|
document.getElementById('total-apps').textContent = stats.total_apps || '0';
|
||||||
|
document.getElementById('total-articles').textContent = stats.total_articles || '0';
|
||||||
|
document.getElementById('total-downloads').textContent = stats.total_downloads || '0';
|
||||||
|
document.getElementById('last-update').textContent = new Date().toLocaleDateString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadCategories() {
|
||||||
|
const categories = await this.api.getCategories();
|
||||||
|
if (!categories) return;
|
||||||
|
|
||||||
|
const filter = document.getElementById('category-filter');
|
||||||
|
categories.forEach(cat => {
|
||||||
|
const btn = document.createElement('button');
|
||||||
|
btn.className = 'filter-btn';
|
||||||
|
btn.dataset.category = cat.slug;
|
||||||
|
btn.textContent = cat.name;
|
||||||
|
btn.onclick = () => this.filterByCategory(cat.slug);
|
||||||
|
filter.appendChild(btn);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadFeaturedContent() {
|
||||||
|
// Load hero featured
|
||||||
|
const featured = await this.api.getApps({ featured: true, limit: 4 });
|
||||||
|
if (!featured || !featured.length) return;
|
||||||
|
|
||||||
|
// Hero card (first featured)
|
||||||
|
const hero = featured[0];
|
||||||
|
const heroCard = document.getElementById('featured-hero');
|
||||||
|
if (hero) {
|
||||||
|
const imageUrl = hero.image || '';
|
||||||
|
heroCard.innerHTML = `
|
||||||
|
<div class="hero-image" ${imageUrl ? `style="background-image: url('${imageUrl}')"` : ''}>
|
||||||
|
${!imageUrl ? `[${hero.category || 'APP'}]` : ''}
|
||||||
|
</div>
|
||||||
|
<div class="hero-content">
|
||||||
|
<span class="hero-badge">${hero.type || 'PAID'}</span>
|
||||||
|
<h2 class="hero-title">${hero.name}</h2>
|
||||||
|
<p class="hero-description">${hero.description}</p>
|
||||||
|
<div class="hero-meta">
|
||||||
|
<span>★ ${hero.rating || 0}/5</span>
|
||||||
|
<span>${hero.downloads || 0} downloads</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
heroCard.onclick = () => this.showAppDetail(hero);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Secondary featured cards
|
||||||
|
const secondary = document.getElementById('featured-secondary');
|
||||||
|
secondary.innerHTML = '';
|
||||||
|
if (featured.length > 1) {
|
||||||
|
featured.slice(1, 4).forEach(app => {
|
||||||
|
const card = document.createElement('div');
|
||||||
|
card.className = 'secondary-card';
|
||||||
|
const imageUrl = app.image || '';
|
||||||
|
card.innerHTML = `
|
||||||
|
<div class="secondary-image" ${imageUrl ? `style="background-image: url('${imageUrl}')"` : ''}>
|
||||||
|
${!imageUrl ? `[${app.category || 'APP'}]` : ''}
|
||||||
|
</div>
|
||||||
|
<div class="secondary-content">
|
||||||
|
<h3 class="secondary-title">${app.name}</h3>
|
||||||
|
<p class="secondary-desc">${(app.description || '').substring(0, 100)}...</p>
|
||||||
|
<div class="secondary-meta">
|
||||||
|
<span>${app.type || 'Open Source'}</span> · <span>★ ${app.rating || 0}/5</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
card.onclick = () => this.showAppDetail(app);
|
||||||
|
secondary.appendChild(card);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadSponsors() {
|
||||||
|
const sponsors = await this.api.getSponsors();
|
||||||
|
if (!sponsors || !sponsors.length) {
|
||||||
|
// Show placeholder if no sponsors
|
||||||
|
const container = document.getElementById('sponsored-content');
|
||||||
|
container.innerHTML = `
|
||||||
|
<div class="sponsor-card">
|
||||||
|
<h4>Become a Sponsor</h4>
|
||||||
|
<p>Reach thousands of developers using Crawl4AI</p>
|
||||||
|
<a href="mailto:sponsors@crawl4ai.com">Contact Us →</a>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const container = document.getElementById('sponsored-content');
|
||||||
|
container.innerHTML = sponsors.slice(0, 5).map(sponsor => `
|
||||||
|
<div class="sponsor-card">
|
||||||
|
<h4>${sponsor.company_name}</h4>
|
||||||
|
<p>${sponsor.tier} Sponsor - Premium Solutions</p>
|
||||||
|
<a href="${sponsor.landing_url}" target="_blank">Learn More →</a>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadMainContent() {
|
||||||
|
// Load apps column
|
||||||
|
const apps = await this.api.getApps({ limit: 8 });
|
||||||
|
if (apps && apps.length) {
|
||||||
|
const appsGrid = document.getElementById('apps-grid');
|
||||||
|
appsGrid.innerHTML = apps.map(app => `
|
||||||
|
<div class="app-compact" onclick="marketplace.showAppDetail(${JSON.stringify(app).replace(/"/g, '"')})">
|
||||||
|
<div class="app-compact-header">
|
||||||
|
<span>${app.category}</span>
|
||||||
|
<span>★ ${app.rating}/5</span>
|
||||||
|
</div>
|
||||||
|
<div class="app-compact-title">${app.name}</div>
|
||||||
|
<div class="app-compact-desc">${app.description}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load articles column
|
||||||
|
const articles = await this.api.getArticles({ limit: 6 });
|
||||||
|
if (articles && articles.length) {
|
||||||
|
const articlesList = document.getElementById('articles-list');
|
||||||
|
articlesList.innerHTML = articles.map(article => `
|
||||||
|
<div class="article-compact" onclick="marketplace.showArticle('${article.id}')">
|
||||||
|
<div class="article-meta">
|
||||||
|
<span>${article.category}</span> · <span>${new Date(article.published_at).toLocaleDateString()}</span>
|
||||||
|
</div>
|
||||||
|
<div class="article-title">${article.title}</div>
|
||||||
|
<div class="article-author">by ${article.author}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load trending
|
||||||
|
if (apps && apps.length) {
|
||||||
|
const trending = apps.slice(0, 5);
|
||||||
|
const trendingList = document.getElementById('trending-list');
|
||||||
|
trendingList.innerHTML = trending.map((app, i) => `
|
||||||
|
<div class="trending-item" onclick="marketplace.showAppDetail(${JSON.stringify(app).replace(/"/g, '"')})">
|
||||||
|
<div class="trending-rank">${i + 1}</div>
|
||||||
|
<div class="trending-info">
|
||||||
|
<div class="trending-name">${app.name}</div>
|
||||||
|
<div class="trending-stats">${app.downloads} downloads</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load more apps grid
|
||||||
|
const moreApps = await this.api.getApps({ offset: 8, limit: 12 });
|
||||||
|
if (moreApps && moreApps.length) {
|
||||||
|
const moreGrid = document.getElementById('more-apps-grid');
|
||||||
|
moreGrid.innerHTML = moreApps.map(app => `
|
||||||
|
<div class="app-compact" onclick="marketplace.showAppDetail(${JSON.stringify(app).replace(/"/g, '"')})">
|
||||||
|
<div class="app-compact-header">
|
||||||
|
<span>${app.category}</span>
|
||||||
|
<span>${app.type}</span>
|
||||||
|
</div>
|
||||||
|
<div class="app-compact-title">${app.name}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
setupEventListeners() {
|
||||||
|
// Search
|
||||||
|
const searchInput = document.getElementById('search-input');
|
||||||
|
searchInput.addEventListener('input', (e) => {
|
||||||
|
clearTimeout(this.searchTimeout);
|
||||||
|
this.searchTimeout = setTimeout(() => this.search(e.target.value), 300);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Keyboard shortcut
|
||||||
|
document.addEventListener('keydown', (e) => {
|
||||||
|
if (e.key === '/' && !searchInput.contains(document.activeElement)) {
|
||||||
|
e.preventDefault();
|
||||||
|
searchInput.focus();
|
||||||
|
}
|
||||||
|
if (e.key === 'Escape' && searchInput.contains(document.activeElement)) {
|
||||||
|
searchInput.blur();
|
||||||
|
searchInput.value = '';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Type filter
|
||||||
|
const typeFilter = document.getElementById('type-filter');
|
||||||
|
typeFilter.addEventListener('change', (e) => {
|
||||||
|
this.currentType = e.target.value;
|
||||||
|
this.loadMainContent();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Load more
|
||||||
|
const loadMore = document.getElementById('load-more');
|
||||||
|
loadMore.addEventListener('click', () => this.loadMoreApps());
|
||||||
|
}
|
||||||
|
|
||||||
|
async filterByCategory(category) {
|
||||||
|
// Update active state
|
||||||
|
document.querySelectorAll('.filter-btn').forEach(btn => {
|
||||||
|
btn.classList.toggle('active', btn.dataset.category === category);
|
||||||
|
});
|
||||||
|
|
||||||
|
this.currentCategory = category;
|
||||||
|
await this.loadMainContent();
|
||||||
|
}
|
||||||
|
|
||||||
|
async search(query) {
|
||||||
|
if (!query) {
|
||||||
|
await this.loadMainContent();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = await this.api.search(query);
|
||||||
|
if (!results) return;
|
||||||
|
|
||||||
|
// Update apps grid with search results
|
||||||
|
if (results.apps && results.apps.length) {
|
||||||
|
const appsGrid = document.getElementById('apps-grid');
|
||||||
|
appsGrid.innerHTML = results.apps.map(app => `
|
||||||
|
<div class="app-compact" onclick="marketplace.showAppDetail(${JSON.stringify(app).replace(/"/g, '"')})">
|
||||||
|
<div class="app-compact-header">
|
||||||
|
<span>${app.category}</span>
|
||||||
|
<span>★ ${app.rating}/5</span>
|
||||||
|
</div>
|
||||||
|
<div class="app-compact-title">${app.name}</div>
|
||||||
|
<div class="app-compact-desc">${app.description}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update articles with search results
|
||||||
|
if (results.articles && results.articles.length) {
|
||||||
|
const articlesList = document.getElementById('articles-list');
|
||||||
|
articlesList.innerHTML = results.articles.map(article => `
|
||||||
|
<div class="article-compact" onclick="marketplace.showArticle('${article.id}')">
|
||||||
|
<div class="article-meta">
|
||||||
|
<span>${article.category}</span> · <span>${new Date(article.published_at).toLocaleDateString()}</span>
|
||||||
|
</div>
|
||||||
|
<div class="article-title">${article.title}</div>
|
||||||
|
<div class="article-author">by ${article.author}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadMoreApps() {
|
||||||
|
this.loadedApps += 12;
|
||||||
|
const moreApps = await this.api.getApps({ offset: this.loadedApps, limit: 12 });
|
||||||
|
if (moreApps && moreApps.length) {
|
||||||
|
const moreGrid = document.getElementById('more-apps-grid');
|
||||||
|
moreApps.forEach(app => {
|
||||||
|
const card = document.createElement('div');
|
||||||
|
card.className = 'app-compact';
|
||||||
|
card.innerHTML = `
|
||||||
|
<div class="app-compact-header">
|
||||||
|
<span>${app.category}</span>
|
||||||
|
<span>${app.type}</span>
|
||||||
|
</div>
|
||||||
|
<div class="app-compact-title">${app.name}</div>
|
||||||
|
`;
|
||||||
|
card.onclick = () => this.showAppDetail(app);
|
||||||
|
moreGrid.appendChild(card);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
showAppDetail(app) {
|
||||||
|
// Navigate to detail page instead of showing modal
|
||||||
|
const slug = app.slug || app.name.toLowerCase().replace(/\s+/g, '-');
|
||||||
|
window.location.href = `app-detail.html?app=${slug}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
showArticle(articleId) {
|
||||||
|
// Could create article detail page similarly
|
||||||
|
console.log('Show article:', articleId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize marketplace
|
||||||
|
let marketplace;
|
||||||
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
|
marketplace = new MarketplaceUI();
|
||||||
|
});
|
||||||
147
docs/md_v2/marketplace/index.html
Normal file
147
docs/md_v2/marketplace/index.html
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en" data-theme="dark">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Marketplace - Crawl4AI</title>
|
||||||
|
<link rel="stylesheet" href="marketplace.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="marketplace-container">
|
||||||
|
<!-- Header -->
|
||||||
|
<header class="marketplace-header">
|
||||||
|
<div class="header-content">
|
||||||
|
<div class="header-left">
|
||||||
|
<div class="logo-title">
|
||||||
|
<img src="../assets/images/logo.png" alt="Crawl4AI" class="header-logo">
|
||||||
|
<h1>
|
||||||
|
<span class="ascii-border">[</span>
|
||||||
|
Marketplace
|
||||||
|
<span class="ascii-border">]</span>
|
||||||
|
</h1>
|
||||||
|
</div>
|
||||||
|
<p class="tagline">Tools, Integrations & Resources for Web Crawling</p>
|
||||||
|
</div>
|
||||||
|
<div class="header-stats" id="stats">
|
||||||
|
<span class="stat-item">Apps: <span id="total-apps">--</span></span>
|
||||||
|
<span class="stat-item">Articles: <span id="total-articles">--</span></span>
|
||||||
|
<span class="stat-item">Downloads: <span id="total-downloads">--</span></span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<!-- Search and Category Bar -->
|
||||||
|
<div class="search-filter-bar">
|
||||||
|
<div class="search-box">
|
||||||
|
<span class="search-icon">></span>
|
||||||
|
<input type="text" id="search-input" placeholder="Search apps, articles, tools..." />
|
||||||
|
<kbd>/</kbd>
|
||||||
|
</div>
|
||||||
|
<div class="category-filter" id="category-filter">
|
||||||
|
<button class="filter-btn active" data-category="all">All</button>
|
||||||
|
<!-- Categories will be loaded here -->
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Magazine Grid Layout -->
|
||||||
|
<main class="magazine-layout">
|
||||||
|
<!-- Hero Featured Section -->
|
||||||
|
<section class="hero-featured">
|
||||||
|
<div id="featured-hero" class="featured-hero-card">
|
||||||
|
<!-- Large featured card with big image -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Secondary Featured -->
|
||||||
|
<section class="secondary-featured">
|
||||||
|
<div id="featured-secondary" class="featured-secondary-cards">
|
||||||
|
<!-- 2-3 medium featured cards with images -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Sponsored Section -->
|
||||||
|
<section class="sponsored-section">
|
||||||
|
<div class="section-label">SPONSORED</div>
|
||||||
|
<div id="sponsored-content" class="sponsored-cards">
|
||||||
|
<!-- Sponsored content cards -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Main Content Grid -->
|
||||||
|
<section class="main-content">
|
||||||
|
<!-- Apps Column -->
|
||||||
|
<div class="apps-column">
|
||||||
|
<div class="column-header">
|
||||||
|
<h2><span class="ascii-icon">></span> Latest Apps</h2>
|
||||||
|
<select id="type-filter" class="mini-filter">
|
||||||
|
<option value="">All</option>
|
||||||
|
<option value="Open Source">Open Source</option>
|
||||||
|
<option value="Paid">Paid</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div id="apps-grid" class="apps-compact-grid">
|
||||||
|
<!-- Compact app cards -->
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Articles Column -->
|
||||||
|
<div class="articles-column">
|
||||||
|
<div class="column-header">
|
||||||
|
<h2><span class="ascii-icon">></span> Latest Articles</h2>
|
||||||
|
</div>
|
||||||
|
<div id="articles-list" class="articles-compact-list">
|
||||||
|
<!-- Article items -->
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Trending/Tools Column -->
|
||||||
|
<div class="trending-column">
|
||||||
|
<div class="column-header">
|
||||||
|
<h2><span class="ascii-icon">#</span> Trending</h2>
|
||||||
|
</div>
|
||||||
|
<div id="trending-list" class="trending-items">
|
||||||
|
<!-- Trending items -->
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="submit-box">
|
||||||
|
<h3><span class="ascii-icon">+</span> Submit Your Tool</h3>
|
||||||
|
<p>Share your integration</p>
|
||||||
|
<a href="mailto:marketplace@crawl4ai.com" class="submit-btn">Submit →</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- More Apps Grid -->
|
||||||
|
<section class="more-apps">
|
||||||
|
<div class="section-header">
|
||||||
|
<h2><span class="ascii-icon">></span> More Apps</h2>
|
||||||
|
<button id="load-more" class="load-more-btn">Load More ↓</button>
|
||||||
|
</div>
|
||||||
|
<div id="more-apps-grid" class="more-apps-grid">
|
||||||
|
<!-- Additional app cards -->
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<!-- Footer -->
|
||||||
|
<footer class="marketplace-footer">
|
||||||
|
<div class="footer-content">
|
||||||
|
<div class="footer-section">
|
||||||
|
<h3>About Marketplace</h3>
|
||||||
|
<p>Discover tools and integrations built by the Crawl4AI community.</p>
|
||||||
|
</div>
|
||||||
|
<div class="footer-section">
|
||||||
|
<h3>Become a Sponsor</h3>
|
||||||
|
<p>Reach developers building with Crawl4AI</p>
|
||||||
|
<a href="mailto:sponsors@crawl4ai.com" class="sponsor-btn">Learn More →</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="footer-bottom">
|
||||||
|
<p>[ Crawl4AI Marketplace · Updated <span id="last-update">--</span> ]</p>
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script src="marketplace.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
994
docs/md_v2/marketplace/marketplace.css
Normal file
994
docs/md_v2/marketplace/marketplace.css
Normal file
@@ -0,0 +1,994 @@
|
|||||||
|
/* Marketplace CSS - Magazine Style Terminal Theme */
|
||||||
|
@import url('../../assets/styles.css');
|
||||||
|
|
||||||
|
:root {
|
||||||
|
--primary-cyan: #50ffff;
|
||||||
|
--primary-teal: #09b5a5;
|
||||||
|
--accent-pink: #f380f5;
|
||||||
|
--bg-dark: #070708;
|
||||||
|
--bg-secondary: #1a1a1a;
|
||||||
|
--bg-tertiary: #3f3f44;
|
||||||
|
--text-primary: #e8e9ed;
|
||||||
|
--text-secondary: #d5cec0;
|
||||||
|
--text-tertiary: #a3abba;
|
||||||
|
--border-color: #3f3f44;
|
||||||
|
--success: #50ff50;
|
||||||
|
--error: #ff3c74;
|
||||||
|
--warning: #f59e0b;
|
||||||
|
}
|
||||||
|
|
||||||
|
* {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
font-family: 'Dank Mono', Monaco, monospace;
|
||||||
|
background: var(--bg-dark);
|
||||||
|
color: var(--text-primary);
|
||||||
|
line-height: 1.6;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Global link styles */
|
||||||
|
a {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
text-decoration: none;
|
||||||
|
transition: color 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
a:hover {
|
||||||
|
color: var(--accent-pink);
|
||||||
|
}
|
||||||
|
|
||||||
|
.marketplace-container {
|
||||||
|
min-height: 100vh;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Header */
|
||||||
|
.marketplace-header {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
padding: 1.5rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-content {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 0 2rem;
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.logo-title {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-logo {
|
||||||
|
height: 40px;
|
||||||
|
width: auto;
|
||||||
|
filter: brightness(1.2);
|
||||||
|
}
|
||||||
|
|
||||||
|
.marketplace-header h1 {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ascii-border {
|
||||||
|
color: var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.tagline {
|
||||||
|
font-size: 0.875rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
margin-top: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-stats {
|
||||||
|
display: flex;
|
||||||
|
gap: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-item {
|
||||||
|
font-size: 0.875rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.stat-item span {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Search and Filter Bar */
|
||||||
|
.search-filter-bar {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 1.5rem auto;
|
||||||
|
padding: 0 2rem;
|
||||||
|
display: flex;
|
||||||
|
gap: 1rem;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-box {
|
||||||
|
flex: 1;
|
||||||
|
max-width: 500px;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
padding: 0.75rem 1rem;
|
||||||
|
transition: border-color 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-box:focus-within {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-icon {
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
margin-right: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
#search-input {
|
||||||
|
flex: 1;
|
||||||
|
background: transparent;
|
||||||
|
border: none;
|
||||||
|
color: var(--text-primary);
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
outline: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-box kbd {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
padding: 0.2rem 0.5rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.category-filter {
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5rem;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.filter-btn {
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.filter-btn:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.filter-btn.active {
|
||||||
|
background: var(--primary-cyan);
|
||||||
|
color: var(--bg-dark);
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Magazine Layout */
|
||||||
|
.magazine-layout {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 0 2rem 4rem;
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
gap: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Hero Featured Section */
|
||||||
|
.hero-featured {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-featured::before {
|
||||||
|
content: '';
|
||||||
|
position: absolute;
|
||||||
|
top: -20px;
|
||||||
|
left: -20px;
|
||||||
|
right: -20px;
|
||||||
|
bottom: -20px;
|
||||||
|
background: radial-gradient(ellipse at center, rgba(80, 255, 255, 0.05), transparent 70%);
|
||||||
|
pointer-events: none;
|
||||||
|
z-index: -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.featured-hero-card {
|
||||||
|
background: linear-gradient(135deg, #1a1a2e, #0f0f1e);
|
||||||
|
border: 2px solid var(--primary-cyan);
|
||||||
|
box-shadow: 0 0 30px rgba(80, 255, 255, 0.15),
|
||||||
|
inset 0 0 20px rgba(80, 255, 255, 0.05);
|
||||||
|
height: 380px;
|
||||||
|
position: relative;
|
||||||
|
overflow: hidden;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.3s ease;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.featured-hero-card:hover {
|
||||||
|
border-color: var(--accent-pink);
|
||||||
|
box-shadow: 0 0 40px rgba(243, 128, 245, 0.2),
|
||||||
|
inset 0 0 30px rgba(243, 128, 245, 0.05);
|
||||||
|
transform: translateY(-2px);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-image {
|
||||||
|
width: 100%;
|
||||||
|
height: 200px;
|
||||||
|
min-height: 200px;
|
||||||
|
max-height: 200px;
|
||||||
|
background: linear-gradient(135deg, rgba(80, 255, 255, 0.1), rgba(243, 128, 245, 0.05));
|
||||||
|
background-size: cover;
|
||||||
|
background-position: center;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
font-size: 3rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
flex-shrink: 0;
|
||||||
|
position: relative;
|
||||||
|
filter: brightness(1.1) contrast(1.1);
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-image img {
|
||||||
|
width: 100%;
|
||||||
|
height: 100%;
|
||||||
|
object-fit: cover;
|
||||||
|
object-position: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-image::after {
|
||||||
|
content: '';
|
||||||
|
position: absolute;
|
||||||
|
bottom: 0;
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
height: 60%;
|
||||||
|
background: linear-gradient(to top, rgba(10, 10, 20, 0.95), transparent);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-content {
|
||||||
|
padding: 1.5rem;
|
||||||
|
flex: 1;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
justify-content: space-between;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-badge {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.3rem 0.6rem;
|
||||||
|
background: linear-gradient(135deg, var(--primary-cyan), var(--primary-teal));
|
||||||
|
color: var(--bg-dark);
|
||||||
|
font-size: 0.7rem;
|
||||||
|
text-transform: uppercase;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
font-weight: 600;
|
||||||
|
box-shadow: 0 2px 10px rgba(80, 255, 255, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-title {
|
||||||
|
font-size: 1.6rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin: 0.5rem 0;
|
||||||
|
text-shadow: 0 0 20px rgba(80, 255, 255, 0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-description {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
line-height: 1.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-meta {
|
||||||
|
display: flex;
|
||||||
|
gap: 1.5rem;
|
||||||
|
margin-top: 1rem;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-meta span {
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-meta span:first-child {
|
||||||
|
color: var(--warning);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Secondary Featured */
|
||||||
|
.secondary-featured {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
min-height: 380px;
|
||||||
|
display: flex;
|
||||||
|
align-items: flex-start;
|
||||||
|
}
|
||||||
|
|
||||||
|
.featured-secondary-cards {
|
||||||
|
width: 100%;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.75rem;
|
||||||
|
align-items: stretch;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-card {
|
||||||
|
background: linear-gradient(135deg, rgba(80, 255, 255, 0.03), rgba(243, 128, 245, 0.02));
|
||||||
|
border: 1px solid rgba(80, 255, 255, 0.3);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.3s ease;
|
||||||
|
display: flex;
|
||||||
|
overflow: hidden;
|
||||||
|
height: 118px;
|
||||||
|
min-height: 118px;
|
||||||
|
max-height: 118px;
|
||||||
|
flex-shrink: 0;
|
||||||
|
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-card:hover {
|
||||||
|
border-color: var(--accent-pink);
|
||||||
|
background: linear-gradient(135deg, rgba(243, 128, 245, 0.05), rgba(80, 255, 255, 0.03));
|
||||||
|
box-shadow: 0 4px 15px rgba(243, 128, 245, 0.2);
|
||||||
|
transform: translateX(-3px);
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-image {
|
||||||
|
width: 120px;
|
||||||
|
background: linear-gradient(135deg, var(--bg-tertiary), var(--bg-secondary));
|
||||||
|
background-size: cover;
|
||||||
|
background-position: center;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
font-size: 1.5rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-content {
|
||||||
|
flex: 1;
|
||||||
|
padding: 1rem;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
justify-content: space-between;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-title {
|
||||||
|
font-size: 1rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-desc {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
display: -webkit-box;
|
||||||
|
-webkit-line-clamp: 2;
|
||||||
|
-webkit-box-orient: vertical;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-meta {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-meta span:last-child {
|
||||||
|
color: var(--warning);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sponsored Section */
|
||||||
|
.sponsored-section {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--warning);
|
||||||
|
padding: 1rem;
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section-label {
|
||||||
|
position: absolute;
|
||||||
|
top: -0.5rem;
|
||||||
|
left: 1rem;
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
padding: 0 0.5rem;
|
||||||
|
color: var(--warning);
|
||||||
|
font-size: 0.65rem;
|
||||||
|
letter-spacing: 0.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsored-cards {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-card {
|
||||||
|
padding: 1rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-logo {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
height: 60px;
|
||||||
|
margin-bottom: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-logo img {
|
||||||
|
max-height: 60px;
|
||||||
|
max-width: 100%;
|
||||||
|
width: auto;
|
||||||
|
object-fit: contain;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-card h4 {
|
||||||
|
color: var(--accent-pink);
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-card p {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 0.85rem;
|
||||||
|
margin-bottom: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-card a {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
text-decoration: none;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-card a:hover {
|
||||||
|
color: var(--accent-pink);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Main Content Grid */
|
||||||
|
.main-content {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
||||||
|
gap: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Column Headers */
|
||||||
|
.column-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
padding-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.column-header h2 {
|
||||||
|
font-size: 1.1rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.mini-filter {
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-primary);
|
||||||
|
padding: 0.25rem 0.5rem;
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ascii-icon {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Apps Column */
|
||||||
|
.apps-compact-grid {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
border-left: 3px solid var(--border-color);
|
||||||
|
padding: 0.75rem;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
border-left-color: var(--accent-pink);
|
||||||
|
transform: translateX(2px);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact-header span:first-child {
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact-header span:last-child {
|
||||||
|
color: var(--warning);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact-title {
|
||||||
|
font-size: 0.9rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-compact-desc {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
display: -webkit-box;
|
||||||
|
-webkit-line-clamp: 2;
|
||||||
|
-webkit-box-orient: vertical;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Articles Column */
|
||||||
|
.articles-compact-list {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.article-compact {
|
||||||
|
border-left: 2px solid var(--border-color);
|
||||||
|
padding-left: 1rem;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.article-compact:hover {
|
||||||
|
border-left-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.article-meta {
|
||||||
|
font-size: 0.7rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.article-meta span:first-child {
|
||||||
|
color: var(--accent-pink);
|
||||||
|
}
|
||||||
|
|
||||||
|
.article-title {
|
||||||
|
font-size: 0.9rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.article-author {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Trending Column */
|
||||||
|
.trending-items {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.trending-item {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.75rem;
|
||||||
|
padding: 0.5rem;
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.trending-item:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.trending-rank {
|
||||||
|
font-size: 1.2rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
width: 2rem;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.trending-info {
|
||||||
|
flex: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.trending-name {
|
||||||
|
font-size: 0.85rem;
|
||||||
|
color: var(--text-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.trending-stats {
|
||||||
|
font-size: 0.7rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Submit Box */
|
||||||
|
.submit-box {
|
||||||
|
margin-top: 1.5rem;
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--primary-cyan);
|
||||||
|
padding: 1rem;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.submit-box h3 {
|
||||||
|
font-size: 1rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.submit-box p {
|
||||||
|
font-size: 0.8rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
margin-bottom: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.submit-btn {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
text-decoration: none;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.submit-btn:hover {
|
||||||
|
background: var(--primary-cyan);
|
||||||
|
color: var(--bg-dark);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* More Apps Section */
|
||||||
|
.more-apps {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
margin-top: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.more-apps-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.load-more-btn {
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
padding: 0.5rem 1.5rem;
|
||||||
|
font-family: inherit;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.load-more-btn:hover {
|
||||||
|
border-color: var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Footer */
|
||||||
|
.marketplace-footer {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border-top: 1px solid var(--border-color);
|
||||||
|
margin-top: 4rem;
|
||||||
|
padding: 2rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.footer-content {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 0 2rem;
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr 1fr;
|
||||||
|
gap: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.footer-section h3 {
|
||||||
|
font-size: 1rem;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
.footer-section p {
|
||||||
|
font-size: 0.875rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-btn {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--primary-cyan);
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
text-decoration: none;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsor-btn:hover {
|
||||||
|
background: var(--primary-cyan);
|
||||||
|
color: var(--bg-dark);
|
||||||
|
}
|
||||||
|
|
||||||
|
.footer-bottom {
|
||||||
|
max-width: 1800px;
|
||||||
|
margin: 2rem auto 0;
|
||||||
|
padding: 1rem 2rem 0;
|
||||||
|
border-top: 1px solid var(--border-color);
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Modal */
|
||||||
|
.modal {
|
||||||
|
position: fixed;
|
||||||
|
top: 0;
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
bottom: 0;
|
||||||
|
background: rgba(0, 0, 0, 0.8);
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
z-index: 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
.modal.hidden {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.modal-content {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border: 1px solid var(--primary-cyan);
|
||||||
|
max-width: 800px;
|
||||||
|
width: 90%;
|
||||||
|
max-height: 80vh;
|
||||||
|
overflow-y: auto;
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
|
||||||
|
.modal-close {
|
||||||
|
position: absolute;
|
||||||
|
top: 1rem;
|
||||||
|
right: 1rem;
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
color: var(--text-primary);
|
||||||
|
padding: 0.25rem 0.5rem;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 1.2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.modal-close:hover {
|
||||||
|
border-color: var(--error);
|
||||||
|
color: var(--error);
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-detail {
|
||||||
|
padding: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-detail h2 {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
color: var(--primary-cyan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Loading */
|
||||||
|
.loading {
|
||||||
|
text-align: center;
|
||||||
|
padding: 2rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.no-results {
|
||||||
|
text-align: center;
|
||||||
|
padding: 2rem;
|
||||||
|
color: var(--text-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive - Tablet */
|
||||||
|
@media (min-width: 768px) {
|
||||||
|
.magazine-layout {
|
||||||
|
grid-template-columns: repeat(2, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-featured {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-featured {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsored-section {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.main-content {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
grid-template-columns: repeat(2, 1fr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive - Desktop */
|
||||||
|
@media (min-width: 1024px) {
|
||||||
|
.magazine-layout {
|
||||||
|
grid-template-columns: repeat(3, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-featured {
|
||||||
|
grid-column: 1 / 3;
|
||||||
|
grid-row: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-featured {
|
||||||
|
grid-column: 3 / 4;
|
||||||
|
grid-row: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.featured-secondary-cards {
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsored-section {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.main-content {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
grid-template-columns: repeat(3, 1fr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive - Wide Desktop */
|
||||||
|
@media (min-width: 1400px) {
|
||||||
|
.magazine-layout {
|
||||||
|
grid-template-columns: repeat(4, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-featured {
|
||||||
|
grid-column: 1 / 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-featured {
|
||||||
|
grid-column: 3 / 5;
|
||||||
|
grid-row: 1;
|
||||||
|
min-height: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.featured-secondary-cards {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(2, 1fr);
|
||||||
|
flex-direction: unset;
|
||||||
|
}
|
||||||
|
|
||||||
|
.main-content {
|
||||||
|
grid-template-columns: repeat(4, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.apps-column {
|
||||||
|
grid-column: span 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
.more-apps-grid {
|
||||||
|
grid-template-columns: repeat(auto-fill, minmax(250px, 1fr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive - Ultra Wide Desktop (for coders with wide monitors) */
|
||||||
|
@media (min-width: 1800px) {
|
||||||
|
.magazine-layout {
|
||||||
|
grid-template-columns: repeat(5, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.hero-featured {
|
||||||
|
grid-column: 1 / 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-featured {
|
||||||
|
grid-column: 3 / 6;
|
||||||
|
min-height: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.featured-secondary-cards {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(3, 1fr);
|
||||||
|
flex-direction: unset;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsored-section {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sponsored-cards {
|
||||||
|
grid-template-columns: repeat(5, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.main-content {
|
||||||
|
grid-template-columns: repeat(5, 1fr);
|
||||||
|
}
|
||||||
|
|
||||||
|
.apps-column {
|
||||||
|
grid-column: span 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
.articles-column {
|
||||||
|
grid-column: span 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
.more-apps-grid {
|
||||||
|
grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive - Mobile */
|
||||||
|
@media (max-width: 767px) {
|
||||||
|
.header-content {
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-filter-bar {
|
||||||
|
flex-direction: column;
|
||||||
|
align-items: stretch;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-box {
|
||||||
|
max-width: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.magazine-layout {
|
||||||
|
padding: 0 1rem 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.footer-content {
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-card {
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-image {
|
||||||
|
width: 100%;
|
||||||
|
height: 150px;
|
||||||
|
}
|
||||||
|
}
|
||||||
412
docs/md_v2/marketplace/marketplace.js
Normal file
412
docs/md_v2/marketplace/marketplace.js
Normal file
@@ -0,0 +1,412 @@
|
|||||||
|
// Marketplace JS - Magazine Layout
|
||||||
|
const { API_BASE, API_ORIGIN } = (() => {
|
||||||
|
const { hostname, port } = window.location;
|
||||||
|
if ((hostname === 'localhost' || hostname === '127.0.0.1') && port === '8000') {
|
||||||
|
const origin = 'http://127.0.0.1:8100';
|
||||||
|
return { API_BASE: `${origin}/marketplace/api`, API_ORIGIN: origin };
|
||||||
|
}
|
||||||
|
return { API_BASE: '/marketplace/api', API_ORIGIN: '' };
|
||||||
|
})();
|
||||||
|
|
||||||
|
const resolveAssetUrl = (path) => {
|
||||||
|
if (!path) return '';
|
||||||
|
if (/^https?:\/\//i.test(path)) return path;
|
||||||
|
if (path.startsWith('/') && API_ORIGIN) {
|
||||||
|
return `${API_ORIGIN}${path}`;
|
||||||
|
}
|
||||||
|
return path;
|
||||||
|
};
|
||||||
|
const CACHE_TTL = 3600000; // 1 hour in ms
|
||||||
|
|
||||||
|
class MarketplaceCache {
|
||||||
|
constructor() {
|
||||||
|
this.prefix = 'c4ai_market_';
|
||||||
|
}
|
||||||
|
|
||||||
|
get(key) {
|
||||||
|
const item = localStorage.getItem(this.prefix + key);
|
||||||
|
if (!item) return null;
|
||||||
|
|
||||||
|
const data = JSON.parse(item);
|
||||||
|
if (Date.now() > data.expires) {
|
||||||
|
localStorage.removeItem(this.prefix + key);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return data.value;
|
||||||
|
}
|
||||||
|
|
||||||
|
set(key, value, ttl = CACHE_TTL) {
|
||||||
|
const data = {
|
||||||
|
value: value,
|
||||||
|
expires: Date.now() + ttl
|
||||||
|
};
|
||||||
|
localStorage.setItem(this.prefix + key, JSON.stringify(data));
|
||||||
|
}
|
||||||
|
|
||||||
|
clear() {
|
||||||
|
Object.keys(localStorage)
|
||||||
|
.filter(k => k.startsWith(this.prefix))
|
||||||
|
.forEach(k => localStorage.removeItem(k));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class MarketplaceAPI {
|
||||||
|
constructor() {
|
||||||
|
this.cache = new MarketplaceCache();
|
||||||
|
this.searchTimeout = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async fetch(endpoint, useCache = true) {
|
||||||
|
const cacheKey = endpoint.replace(/[^\w]/g, '_');
|
||||||
|
|
||||||
|
if (useCache) {
|
||||||
|
const cached = this.cache.get(cacheKey);
|
||||||
|
if (cached) return cached;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${API_BASE}${endpoint}`);
|
||||||
|
if (!response.ok) throw new Error(`HTTP ${response.status}`);
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
this.cache.set(cacheKey, data);
|
||||||
|
return data;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('API Error:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async getStats() {
|
||||||
|
return this.fetch('/stats');
|
||||||
|
}
|
||||||
|
|
||||||
|
async getCategories() {
|
||||||
|
return this.fetch('/categories');
|
||||||
|
}
|
||||||
|
|
||||||
|
async getApps(params = {}) {
|
||||||
|
const query = new URLSearchParams(params).toString();
|
||||||
|
return this.fetch(`/apps${query ? '?' + query : ''}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async getArticles(params = {}) {
|
||||||
|
const query = new URLSearchParams(params).toString();
|
||||||
|
return this.fetch(`/articles${query ? '?' + query : ''}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async getSponsors() {
|
||||||
|
return this.fetch('/sponsors');
|
||||||
|
}
|
||||||
|
|
||||||
|
async search(query) {
|
||||||
|
if (query.length < 2) return {};
|
||||||
|
return this.fetch(`/search?q=${encodeURIComponent(query)}`, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class MarketplaceUI {
|
||||||
|
constructor() {
|
||||||
|
this.api = new MarketplaceAPI();
|
||||||
|
this.currentCategory = 'all';
|
||||||
|
this.currentType = '';
|
||||||
|
this.searchTimeout = null;
|
||||||
|
this.loadedApps = 10;
|
||||||
|
this.init();
|
||||||
|
}
|
||||||
|
|
||||||
|
async init() {
|
||||||
|
await this.loadStats();
|
||||||
|
await this.loadCategories();
|
||||||
|
await this.loadFeaturedContent();
|
||||||
|
await this.loadSponsors();
|
||||||
|
await this.loadMainContent();
|
||||||
|
this.setupEventListeners();
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadStats() {
|
||||||
|
const stats = await this.api.getStats();
|
||||||
|
if (stats) {
|
||||||
|
document.getElementById('total-apps').textContent = stats.total_apps || '0';
|
||||||
|
document.getElementById('total-articles').textContent = stats.total_articles || '0';
|
||||||
|
document.getElementById('total-downloads').textContent = stats.total_downloads || '0';
|
||||||
|
document.getElementById('last-update').textContent = new Date().toLocaleDateString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadCategories() {
|
||||||
|
const categories = await this.api.getCategories();
|
||||||
|
if (!categories) return;
|
||||||
|
|
||||||
|
const filter = document.getElementById('category-filter');
|
||||||
|
categories.forEach(cat => {
|
||||||
|
const btn = document.createElement('button');
|
||||||
|
btn.className = 'filter-btn';
|
||||||
|
btn.dataset.category = cat.slug;
|
||||||
|
btn.textContent = cat.name;
|
||||||
|
btn.onclick = () => this.filterByCategory(cat.slug);
|
||||||
|
filter.appendChild(btn);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadFeaturedContent() {
|
||||||
|
// Load hero featured
|
||||||
|
const featured = await this.api.getApps({ featured: true, limit: 4 });
|
||||||
|
if (!featured || !featured.length) return;
|
||||||
|
|
||||||
|
// Hero card (first featured)
|
||||||
|
const hero = featured[0];
|
||||||
|
const heroCard = document.getElementById('featured-hero');
|
||||||
|
if (hero) {
|
||||||
|
const imageUrl = hero.image || '';
|
||||||
|
heroCard.innerHTML = `
|
||||||
|
<div class="hero-image" ${imageUrl ? `style="background-image: url('${imageUrl}')"` : ''}>
|
||||||
|
${!imageUrl ? `[${hero.category || 'APP'}]` : ''}
|
||||||
|
</div>
|
||||||
|
<div class="hero-content">
|
||||||
|
<span class="hero-badge">${hero.type || 'PAID'}</span>
|
||||||
|
<h2 class="hero-title">${hero.name}</h2>
|
||||||
|
<p class="hero-description">${hero.description}</p>
|
||||||
|
<div class="hero-meta">
|
||||||
|
<span>★ ${hero.rating || 0}/5</span>
|
||||||
|
<span>${hero.downloads || 0} downloads</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
heroCard.onclick = () => this.showAppDetail(hero);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Secondary featured cards
|
||||||
|
const secondary = document.getElementById('featured-secondary');
|
||||||
|
secondary.innerHTML = '';
|
||||||
|
if (featured.length > 1) {
|
||||||
|
featured.slice(1, 4).forEach(app => {
|
||||||
|
const card = document.createElement('div');
|
||||||
|
card.className = 'secondary-card';
|
||||||
|
const imageUrl = app.image || '';
|
||||||
|
card.innerHTML = `
|
||||||
|
<div class="secondary-image" ${imageUrl ? `style="background-image: url('${imageUrl}')"` : ''}>
|
||||||
|
${!imageUrl ? `[${app.category || 'APP'}]` : ''}
|
||||||
|
</div>
|
||||||
|
<div class="secondary-content">
|
||||||
|
<h3 class="secondary-title">${app.name}</h3>
|
||||||
|
<p class="secondary-desc">${(app.description || '').substring(0, 100)}...</p>
|
||||||
|
<div class="secondary-meta">
|
||||||
|
<span>${app.type || 'Open Source'}</span> · <span>★ ${app.rating || 0}/5</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
card.onclick = () => this.showAppDetail(app);
|
||||||
|
secondary.appendChild(card);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadSponsors() {
|
||||||
|
const sponsors = await this.api.getSponsors();
|
||||||
|
if (!sponsors || !sponsors.length) {
|
||||||
|
// Show placeholder if no sponsors
|
||||||
|
const container = document.getElementById('sponsored-content');
|
||||||
|
container.innerHTML = `
|
||||||
|
<div class="sponsor-card">
|
||||||
|
<h4>Become a Sponsor</h4>
|
||||||
|
<p>Reach thousands of developers using Crawl4AI</p>
|
||||||
|
<a href="mailto:sponsors@crawl4ai.com">Contact Us →</a>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const container = document.getElementById('sponsored-content');
|
||||||
|
container.innerHTML = sponsors.slice(0, 5).map(sponsor => `
|
||||||
|
<div class="sponsor-card">
|
||||||
|
${sponsor.logo_url ? `<div class="sponsor-logo"><img src="${resolveAssetUrl(sponsor.logo_url)}" alt="${sponsor.company_name} logo"></div>` : ''}
|
||||||
|
<h4>${sponsor.company_name}</h4>
|
||||||
|
<p>${sponsor.tier} Sponsor - Premium Solutions</p>
|
||||||
|
<a href="${sponsor.landing_url}" target="_blank">Learn More →</a>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadMainContent() {
|
||||||
|
// Load apps column
|
||||||
|
const apps = await this.api.getApps({ limit: 8 });
|
||||||
|
if (apps && apps.length) {
|
||||||
|
const appsGrid = document.getElementById('apps-grid');
|
||||||
|
appsGrid.innerHTML = apps.map(app => `
|
||||||
|
<div class="app-compact" onclick="marketplace.showAppDetail(${JSON.stringify(app).replace(/"/g, '"')})">
|
||||||
|
<div class="app-compact-header">
|
||||||
|
<span>${app.category}</span>
|
||||||
|
<span>★ ${app.rating}/5</span>
|
||||||
|
</div>
|
||||||
|
<div class="app-compact-title">${app.name}</div>
|
||||||
|
<div class="app-compact-desc">${app.description}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load articles column
|
||||||
|
const articles = await this.api.getArticles({ limit: 6 });
|
||||||
|
if (articles && articles.length) {
|
||||||
|
const articlesList = document.getElementById('articles-list');
|
||||||
|
articlesList.innerHTML = articles.map(article => `
|
||||||
|
<div class="article-compact" onclick="marketplace.showArticle('${article.id}')">
|
||||||
|
<div class="article-meta">
|
||||||
|
<span>${article.category}</span> · <span>${new Date(article.published_at).toLocaleDateString()}</span>
|
||||||
|
</div>
|
||||||
|
<div class="article-title">${article.title}</div>
|
||||||
|
<div class="article-author">by ${article.author}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load trending
|
||||||
|
if (apps && apps.length) {
|
||||||
|
const trending = apps.slice(0, 5);
|
||||||
|
const trendingList = document.getElementById('trending-list');
|
||||||
|
trendingList.innerHTML = trending.map((app, i) => `
|
||||||
|
<div class="trending-item" onclick="marketplace.showAppDetail(${JSON.stringify(app).replace(/"/g, '"')})">
|
||||||
|
<div class="trending-rank">${i + 1}</div>
|
||||||
|
<div class="trending-info">
|
||||||
|
<div class="trending-name">${app.name}</div>
|
||||||
|
<div class="trending-stats">${app.downloads} downloads</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load more apps grid
|
||||||
|
const moreApps = await this.api.getApps({ offset: 8, limit: 12 });
|
||||||
|
if (moreApps && moreApps.length) {
|
||||||
|
const moreGrid = document.getElementById('more-apps-grid');
|
||||||
|
moreGrid.innerHTML = moreApps.map(app => `
|
||||||
|
<div class="app-compact" onclick="marketplace.showAppDetail(${JSON.stringify(app).replace(/"/g, '"')})">
|
||||||
|
<div class="app-compact-header">
|
||||||
|
<span>${app.category}</span>
|
||||||
|
<span>${app.type}</span>
|
||||||
|
</div>
|
||||||
|
<div class="app-compact-title">${app.name}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
setupEventListeners() {
|
||||||
|
// Search
|
||||||
|
const searchInput = document.getElementById('search-input');
|
||||||
|
searchInput.addEventListener('input', (e) => {
|
||||||
|
clearTimeout(this.searchTimeout);
|
||||||
|
this.searchTimeout = setTimeout(() => this.search(e.target.value), 300);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Keyboard shortcut
|
||||||
|
document.addEventListener('keydown', (e) => {
|
||||||
|
if (e.key === '/' && !searchInput.contains(document.activeElement)) {
|
||||||
|
e.preventDefault();
|
||||||
|
searchInput.focus();
|
||||||
|
}
|
||||||
|
if (e.key === 'Escape' && searchInput.contains(document.activeElement)) {
|
||||||
|
searchInput.blur();
|
||||||
|
searchInput.value = '';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Type filter
|
||||||
|
const typeFilter = document.getElementById('type-filter');
|
||||||
|
typeFilter.addEventListener('change', (e) => {
|
||||||
|
this.currentType = e.target.value;
|
||||||
|
this.loadMainContent();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Load more
|
||||||
|
const loadMore = document.getElementById('load-more');
|
||||||
|
loadMore.addEventListener('click', () => this.loadMoreApps());
|
||||||
|
}
|
||||||
|
|
||||||
|
async filterByCategory(category) {
|
||||||
|
// Update active state
|
||||||
|
document.querySelectorAll('.filter-btn').forEach(btn => {
|
||||||
|
btn.classList.toggle('active', btn.dataset.category === category);
|
||||||
|
});
|
||||||
|
|
||||||
|
this.currentCategory = category;
|
||||||
|
await this.loadMainContent();
|
||||||
|
}
|
||||||
|
|
||||||
|
async search(query) {
|
||||||
|
if (!query) {
|
||||||
|
await this.loadMainContent();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = await this.api.search(query);
|
||||||
|
if (!results) return;
|
||||||
|
|
||||||
|
// Update apps grid with search results
|
||||||
|
if (results.apps && results.apps.length) {
|
||||||
|
const appsGrid = document.getElementById('apps-grid');
|
||||||
|
appsGrid.innerHTML = results.apps.map(app => `
|
||||||
|
<div class="app-compact" onclick="marketplace.showAppDetail(${JSON.stringify(app).replace(/"/g, '"')})">
|
||||||
|
<div class="app-compact-header">
|
||||||
|
<span>${app.category}</span>
|
||||||
|
<span>★ ${app.rating}/5</span>
|
||||||
|
</div>
|
||||||
|
<div class="app-compact-title">${app.name}</div>
|
||||||
|
<div class="app-compact-desc">${app.description}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update articles with search results
|
||||||
|
if (results.articles && results.articles.length) {
|
||||||
|
const articlesList = document.getElementById('articles-list');
|
||||||
|
articlesList.innerHTML = results.articles.map(article => `
|
||||||
|
<div class="article-compact" onclick="marketplace.showArticle('${article.id}')">
|
||||||
|
<div class="article-meta">
|
||||||
|
<span>${article.category}</span> · <span>${new Date(article.published_at).toLocaleDateString()}</span>
|
||||||
|
</div>
|
||||||
|
<div class="article-title">${article.title}</div>
|
||||||
|
<div class="article-author">by ${article.author}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadMoreApps() {
|
||||||
|
this.loadedApps += 12;
|
||||||
|
const moreApps = await this.api.getApps({ offset: this.loadedApps, limit: 12 });
|
||||||
|
if (moreApps && moreApps.length) {
|
||||||
|
const moreGrid = document.getElementById('more-apps-grid');
|
||||||
|
moreApps.forEach(app => {
|
||||||
|
const card = document.createElement('div');
|
||||||
|
card.className = 'app-compact';
|
||||||
|
card.innerHTML = `
|
||||||
|
<div class="app-compact-header">
|
||||||
|
<span>${app.category}</span>
|
||||||
|
<span>${app.type}</span>
|
||||||
|
</div>
|
||||||
|
<div class="app-compact-title">${app.name}</div>
|
||||||
|
`;
|
||||||
|
card.onclick = () => this.showAppDetail(app);
|
||||||
|
moreGrid.appendChild(card);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
showAppDetail(app) {
|
||||||
|
// Navigate to detail page instead of showing modal
|
||||||
|
const slug = app.slug || app.name.toLowerCase().replace(/\s+/g, '-');
|
||||||
|
window.location.href = `app-detail.html?app=${slug}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
showArticle(articleId) {
|
||||||
|
// Could create article detail page similarly
|
||||||
|
console.log('Show article:', articleId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize marketplace
|
||||||
|
let marketplace;
|
||||||
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
|
marketplace = new MarketplaceUI();
|
||||||
|
});
|
||||||
338
docs/releases_review/demo_v0.7.5.py
Normal file
338
docs/releases_review/demo_v0.7.5.py
Normal file
@@ -0,0 +1,338 @@
|
|||||||
|
"""
|
||||||
|
🚀 Crawl4AI v0.7.5 Release Demo - Working Examples
|
||||||
|
==================================================
|
||||||
|
This demo showcases key features introduced in v0.7.5 with real, executable examples.
|
||||||
|
|
||||||
|
Featured Demos:
|
||||||
|
1. ✅ Docker Hooks System - Real API calls with custom hooks (string & function-based)
|
||||||
|
2. ✅ Enhanced LLM Integration - Working LLM configurations
|
||||||
|
3. ✅ HTTPS Preservation - Live crawling with HTTPS maintenance
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- crawl4ai v0.7.5 installed
|
||||||
|
- Docker running with crawl4ai image (optional for Docker demos)
|
||||||
|
- Valid API keys for LLM demos (optional)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from crawl4ai import (AsyncWebCrawler, CrawlerRunConfig, BrowserConfig,
|
||||||
|
CacheMode, FilterChain, URLPatternFilter, BFSDeepCrawlStrategy,
|
||||||
|
hooks_to_string)
|
||||||
|
from crawl4ai.docker_client import Crawl4aiDockerClient
|
||||||
|
|
||||||
|
|
||||||
|
def print_section(title: str, description: str = ""):
|
||||||
|
"""Print a section header"""
|
||||||
|
print(f"\n{'=' * 60}")
|
||||||
|
print(f"{title}")
|
||||||
|
if description:
|
||||||
|
print(f"{description}")
|
||||||
|
print(f"{'=' * 60}\n")
|
||||||
|
|
||||||
|
|
||||||
|
async def demo_1_docker_hooks_system():
|
||||||
|
"""Demo 1: Docker Hooks System - Real API calls with custom hooks"""
|
||||||
|
print_section(
|
||||||
|
"Demo 1: Docker Hooks System",
|
||||||
|
"Testing both string-based and function-based hooks (NEW in v0.7.5!)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check Docker service availability
|
||||||
|
def check_docker_service():
|
||||||
|
try:
|
||||||
|
response = requests.get("http://localhost:11235/", timeout=3)
|
||||||
|
return response.status_code == 200
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
print("Checking Docker service...")
|
||||||
|
docker_running = check_docker_service()
|
||||||
|
|
||||||
|
if not docker_running:
|
||||||
|
print("⚠️ Docker service not running on localhost:11235")
|
||||||
|
print("To test Docker hooks:")
|
||||||
|
print("1. Run: docker run -p 11235:11235 unclecode/crawl4ai:latest")
|
||||||
|
print("2. Wait for service to start")
|
||||||
|
print("3. Re-run this demo\n")
|
||||||
|
return
|
||||||
|
|
||||||
|
print("✓ Docker service detected!")
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# PART 1: Traditional String-Based Hooks (Works with REST API)
|
||||||
|
# ============================================================================
|
||||||
|
print("\n" + "─" * 60)
|
||||||
|
print("Part 1: String-Based Hooks (REST API)")
|
||||||
|
print("─" * 60)
|
||||||
|
|
||||||
|
hooks_config_string = {
|
||||||
|
"on_page_context_created": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print("[String Hook] Setting up page context")
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
"before_retrieve_html": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print("[String Hook] Before retrieving HTML")
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/html"],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_config_string,
|
||||||
|
"timeout": 30
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print("🔧 Using string-based hooks for REST API...")
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
response = requests.post("http://localhost:11235/crawl", json=payload, timeout=60)
|
||||||
|
execution_time = time.time() - start_time
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
print(f"✅ String-based hooks executed in {execution_time:.2f}s")
|
||||||
|
if result.get('results') and result['results'][0].get('success'):
|
||||||
|
html_length = len(result['results'][0].get('html', ''))
|
||||||
|
print(f" 📄 HTML length: {html_length} characters")
|
||||||
|
else:
|
||||||
|
print(f"❌ Request failed: {response.status_code}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {str(e)}")
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# PART 2: NEW Function-Based Hooks with Docker Client (v0.7.5)
|
||||||
|
# ============================================================================
|
||||||
|
print("\n" + "─" * 60)
|
||||||
|
print("Part 2: Function-Based Hooks with Docker Client (✨ NEW!)")
|
||||||
|
print("─" * 60)
|
||||||
|
|
||||||
|
# Define hooks as regular Python functions
|
||||||
|
async def on_page_context_created_func(page, context, **kwargs):
|
||||||
|
"""Block images to speed up crawling"""
|
||||||
|
print("[Function Hook] Setting up page context")
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def before_goto_func(page, context, url, **kwargs):
|
||||||
|
"""Add custom headers before navigation"""
|
||||||
|
print(f"[Function Hook] About to navigate to {url}")
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'X-Crawl4AI': 'v0.7.5-function-hooks',
|
||||||
|
'X-Test-Header': 'demo'
|
||||||
|
})
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def before_retrieve_html_func(page, context, **kwargs):
|
||||||
|
"""Scroll to load lazy content"""
|
||||||
|
print("[Function Hook] Scrolling page for lazy-loaded content")
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(500)
|
||||||
|
await page.evaluate("window.scrollTo(0, 0)")
|
||||||
|
return page
|
||||||
|
|
||||||
|
# Use the hooks_to_string utility (can be used standalone)
|
||||||
|
print("\n📦 Converting functions to strings with hooks_to_string()...")
|
||||||
|
hooks_as_strings = hooks_to_string({
|
||||||
|
"on_page_context_created": on_page_context_created_func,
|
||||||
|
"before_goto": before_goto_func,
|
||||||
|
"before_retrieve_html": before_retrieve_html_func
|
||||||
|
})
|
||||||
|
print(f" ✓ Converted {len(hooks_as_strings)} hooks to string format")
|
||||||
|
|
||||||
|
# OR use Docker Client which does conversion automatically!
|
||||||
|
print("\n🐳 Using Docker Client with automatic conversion...")
|
||||||
|
try:
|
||||||
|
client = Crawl4aiDockerClient(base_url="http://localhost:11235")
|
||||||
|
|
||||||
|
# Pass function objects directly - conversion happens automatically!
|
||||||
|
results = await client.crawl(
|
||||||
|
urls=["https://httpbin.org/html"],
|
||||||
|
hooks={
|
||||||
|
"on_page_context_created": on_page_context_created_func,
|
||||||
|
"before_goto": before_goto_func,
|
||||||
|
"before_retrieve_html": before_retrieve_html_func
|
||||||
|
},
|
||||||
|
hooks_timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if results and results.success:
|
||||||
|
print(f"✅ Function-based hooks executed successfully!")
|
||||||
|
print(f" 📄 HTML length: {len(results.html)} characters")
|
||||||
|
print(f" 🎯 URL: {results.url}")
|
||||||
|
else:
|
||||||
|
print("⚠️ Crawl completed but may have warnings")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Docker client error: {str(e)}")
|
||||||
|
|
||||||
|
# Show the benefits
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("✨ Benefits of Function-Based Hooks:")
|
||||||
|
print("=" * 60)
|
||||||
|
print("✓ Full IDE support (autocomplete, syntax highlighting)")
|
||||||
|
print("✓ Type checking and linting")
|
||||||
|
print("✓ Easier to test and debug")
|
||||||
|
print("✓ Reusable across projects")
|
||||||
|
print("✓ Automatic conversion in Docker client")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
|
||||||
|
async def demo_2_enhanced_llm_integration():
|
||||||
|
"""Demo 2: Enhanced LLM Integration - Working LLM configurations"""
|
||||||
|
print_section(
|
||||||
|
"Demo 2: Enhanced LLM Integration",
|
||||||
|
"Testing custom LLM providers and configurations"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("🤖 Testing Enhanced LLM Integration Features")
|
||||||
|
|
||||||
|
provider = "gemini/gemini-2.5-flash-lite"
|
||||||
|
payload = {
|
||||||
|
"url": "https://example.com",
|
||||||
|
"f": "llm",
|
||||||
|
"q": "Summarize this page in one sentence.",
|
||||||
|
"provider": provider, # Explicitly set provider
|
||||||
|
"temperature": 0.7
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
"http://localhost:11235/md",
|
||||||
|
json=payload,
|
||||||
|
timeout=60
|
||||||
|
)
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
print(f"✓ Request successful with provider: {provider}")
|
||||||
|
print(f" - Response keys: {list(result.keys())}")
|
||||||
|
print(f" - Content length: {len(result.get('markdown', ''))} characters")
|
||||||
|
print(f" - Note: Actual LLM call may fail without valid API key")
|
||||||
|
else:
|
||||||
|
print(f"❌ Request failed: {response.status_code}")
|
||||||
|
print(f" - Response: {response.text[:500]}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[red]Error: {e}[/]")
|
||||||
|
|
||||||
|
|
||||||
|
async def demo_3_https_preservation():
|
||||||
|
"""Demo 3: HTTPS Preservation - Live crawling with HTTPS maintenance"""
|
||||||
|
print_section(
|
||||||
|
"Demo 3: HTTPS Preservation",
|
||||||
|
"Testing HTTPS preservation for internal links"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("🔒 Testing HTTPS Preservation Feature")
|
||||||
|
|
||||||
|
# Test with HTTPS preservation enabled
|
||||||
|
print("\nTest 1: HTTPS Preservation ENABLED")
|
||||||
|
|
||||||
|
url_filter = URLPatternFilter(
|
||||||
|
patterns=["^(https:\/\/)?quotes\.toscrape\.com(\/.*)?$"]
|
||||||
|
)
|
||||||
|
config = CrawlerRunConfig(
|
||||||
|
exclude_external_links=True,
|
||||||
|
stream=True,
|
||||||
|
verbose=False,
|
||||||
|
preserve_https_for_internal_links=True,
|
||||||
|
deep_crawl_strategy=BFSDeepCrawlStrategy(
|
||||||
|
max_depth=2,
|
||||||
|
max_pages=5,
|
||||||
|
filter_chain=FilterChain([url_filter])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
test_url = "https://quotes.toscrape.com"
|
||||||
|
print(f"🎯 Testing URL: {test_url}")
|
||||||
|
|
||||||
|
async with AsyncWebCrawler() as crawler:
|
||||||
|
async for result in await crawler.arun(url=test_url, config=config):
|
||||||
|
print("✓ HTTPS Preservation Test Completed")
|
||||||
|
internal_links = [i['href'] for i in result.links['internal']]
|
||||||
|
for link in internal_links:
|
||||||
|
print(f" → {link}")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""Run all demos"""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("🚀 Crawl4AI v0.7.5 Working Demo")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
# Check system requirements
|
||||||
|
print("🔍 System Requirements Check:")
|
||||||
|
print(f" - Python version: {sys.version.split()[0]} {'✓' if sys.version_info >= (3, 10) else '❌ (3.10+ required)'}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
print(f" - Requests library: ✓")
|
||||||
|
except ImportError:
|
||||||
|
print(f" - Requests library: ❌")
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
demos = [
|
||||||
|
("Docker Hooks System", demo_1_docker_hooks_system),
|
||||||
|
("Enhanced LLM Integration", demo_2_enhanced_llm_integration),
|
||||||
|
("HTTPS Preservation", demo_3_https_preservation),
|
||||||
|
]
|
||||||
|
|
||||||
|
for i, (name, demo_func) in enumerate(demos, 1):
|
||||||
|
try:
|
||||||
|
print(f"\n📍 Starting Demo {i}/{len(demos)}: {name}")
|
||||||
|
await demo_func()
|
||||||
|
|
||||||
|
if i < len(demos):
|
||||||
|
print(f"\n✨ Demo {i} complete! Press Enter for next demo...")
|
||||||
|
input()
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print(f"\n⏹️ Demo interrupted by user")
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Demo {i} error: {str(e)}")
|
||||||
|
print("Continuing to next demo...")
|
||||||
|
continue
|
||||||
|
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("🎉 Demo Complete!")
|
||||||
|
print("=" * 60)
|
||||||
|
print("You've experienced the power of Crawl4AI v0.7.5!")
|
||||||
|
print("")
|
||||||
|
print("Key Features Demonstrated:")
|
||||||
|
print("🔧 Docker Hooks - String-based & function-based (NEW!)")
|
||||||
|
print(" • hooks_to_string() utility for function conversion")
|
||||||
|
print(" • Docker client with automatic conversion")
|
||||||
|
print(" • Full IDE support and type checking")
|
||||||
|
print("🤖 Enhanced LLM - Better AI integration")
|
||||||
|
print("🔒 HTTPS Preservation - Secure link handling")
|
||||||
|
print("")
|
||||||
|
print("Ready to build something amazing? 🚀")
|
||||||
|
print("")
|
||||||
|
print("📖 Docs: https://docs.crawl4ai.com/")
|
||||||
|
print("🐙 GitHub: https://github.com/unclecode/crawl4ai")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("🚀 Crawl4AI v0.7.5 Live Demo Starting...")
|
||||||
|
print("Press Ctrl+C anytime to exit\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(main())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n👋 Demo stopped by user. Thanks for trying Crawl4AI v0.7.5!")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n❌ Demo error: {str(e)}")
|
||||||
|
print("Make sure you have the required dependencies installed.")
|
||||||
359
docs/releases_review/demo_v0.7.6.py
Normal file
359
docs/releases_review/demo_v0.7.6.py
Normal file
@@ -0,0 +1,359 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Crawl4AI v0.7.6 Release Demo
|
||||||
|
============================
|
||||||
|
|
||||||
|
This demo showcases the major feature in v0.7.6:
|
||||||
|
**Webhook Support for Docker Job Queue API**
|
||||||
|
|
||||||
|
Features Demonstrated:
|
||||||
|
1. Asynchronous job processing with webhook notifications
|
||||||
|
2. Webhook support for /crawl/job endpoint
|
||||||
|
3. Webhook support for /llm/job endpoint
|
||||||
|
4. Notification-only vs data-in-payload modes
|
||||||
|
5. Custom webhook headers for authentication
|
||||||
|
6. Structured extraction with JSON schemas
|
||||||
|
7. Exponential backoff retry for reliable delivery
|
||||||
|
|
||||||
|
Prerequisites:
|
||||||
|
- Crawl4AI Docker container running on localhost:11235
|
||||||
|
- Flask installed: pip install flask requests
|
||||||
|
- LLM API key configured (for LLM examples)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python docs/releases_review/demo_v0.7.6.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from flask import Flask, request, jsonify
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
CRAWL4AI_BASE_URL = "http://localhost:11235"
|
||||||
|
WEBHOOK_BASE_URL = "http://localhost:8080"
|
||||||
|
|
||||||
|
# Flask app for webhook receiver
|
||||||
|
app = Flask(__name__)
|
||||||
|
received_webhooks = []
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/webhook', methods=['POST'])
|
||||||
|
def webhook_handler():
|
||||||
|
"""Universal webhook handler for both crawl and LLM extraction jobs."""
|
||||||
|
payload = request.json
|
||||||
|
task_id = payload['task_id']
|
||||||
|
task_type = payload['task_type']
|
||||||
|
status = payload['status']
|
||||||
|
|
||||||
|
print(f"\n{'='*70}")
|
||||||
|
print(f"📬 Webhook Received!")
|
||||||
|
print(f" Task ID: {task_id}")
|
||||||
|
print(f" Task Type: {task_type}")
|
||||||
|
print(f" Status: {status}")
|
||||||
|
print(f" Timestamp: {payload['timestamp']}")
|
||||||
|
|
||||||
|
if status == 'completed':
|
||||||
|
if 'data' in payload:
|
||||||
|
print(f" ✅ Data included in webhook")
|
||||||
|
if task_type == 'crawl':
|
||||||
|
results = payload['data'].get('results', [])
|
||||||
|
print(f" 📊 Crawled {len(results)} URL(s)")
|
||||||
|
elif task_type == 'llm_extraction':
|
||||||
|
extracted = payload['data'].get('extracted_content', {})
|
||||||
|
print(f" 🤖 Extracted: {json.dumps(extracted, indent=6)}")
|
||||||
|
else:
|
||||||
|
print(f" 📥 Notification only (fetch data separately)")
|
||||||
|
elif status == 'failed':
|
||||||
|
print(f" ❌ Error: {payload.get('error', 'Unknown')}")
|
||||||
|
|
||||||
|
print(f"{'='*70}\n")
|
||||||
|
received_webhooks.append(payload)
|
||||||
|
|
||||||
|
return jsonify({"status": "received"}), 200
|
||||||
|
|
||||||
|
|
||||||
|
def start_webhook_server():
|
||||||
|
"""Start Flask webhook server in background."""
|
||||||
|
app.run(host='0.0.0.0', port=8080, debug=False, use_reloader=False)
|
||||||
|
|
||||||
|
|
||||||
|
def demo_1_crawl_webhook_notification_only():
|
||||||
|
"""Demo 1: Crawl job with webhook notification (data fetched separately)."""
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("DEMO 1: Crawl Job - Webhook Notification Only")
|
||||||
|
print("="*70)
|
||||||
|
print("Submitting crawl job with webhook notification...")
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"browser_config": {"headless": True},
|
||||||
|
"crawler_config": {"cache_mode": "bypass"},
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": f"{WEBHOOK_BASE_URL}/webhook",
|
||||||
|
"webhook_data_in_payload": False,
|
||||||
|
"webhook_headers": {
|
||||||
|
"X-Demo": "v0.7.6",
|
||||||
|
"X-Type": "crawl"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(f"{CRAWL4AI_BASE_URL}/crawl/job", json=payload)
|
||||||
|
if response.ok:
|
||||||
|
task_id = response.json()['task_id']
|
||||||
|
print(f"✅ Job submitted: {task_id}")
|
||||||
|
print("⏳ Webhook will notify when complete...")
|
||||||
|
return task_id
|
||||||
|
else:
|
||||||
|
print(f"❌ Failed: {response.text}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def demo_2_crawl_webhook_with_data():
|
||||||
|
"""Demo 2: Crawl job with full data in webhook payload."""
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("DEMO 2: Crawl Job - Webhook with Full Data")
|
||||||
|
print("="*70)
|
||||||
|
print("Submitting crawl job with data included in webhook...")
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://www.python.org"],
|
||||||
|
"browser_config": {"headless": True},
|
||||||
|
"crawler_config": {"cache_mode": "bypass"},
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": f"{WEBHOOK_BASE_URL}/webhook",
|
||||||
|
"webhook_data_in_payload": True,
|
||||||
|
"webhook_headers": {
|
||||||
|
"X-Demo": "v0.7.6",
|
||||||
|
"X-Type": "crawl-with-data"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(f"{CRAWL4AI_BASE_URL}/crawl/job", json=payload)
|
||||||
|
if response.ok:
|
||||||
|
task_id = response.json()['task_id']
|
||||||
|
print(f"✅ Job submitted: {task_id}")
|
||||||
|
print("⏳ Webhook will include full results...")
|
||||||
|
return task_id
|
||||||
|
else:
|
||||||
|
print(f"❌ Failed: {response.text}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def demo_3_llm_webhook_notification_only():
|
||||||
|
"""Demo 3: LLM extraction with webhook notification (NEW in v0.7.6!)."""
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("DEMO 3: LLM Extraction - Webhook Notification Only (NEW!)")
|
||||||
|
print("="*70)
|
||||||
|
print("Submitting LLM extraction job with webhook notification...")
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"url": "https://www.example.com",
|
||||||
|
"q": "Extract the main heading and description from this page",
|
||||||
|
"provider": "openai/gpt-4o-mini",
|
||||||
|
"cache": False,
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": f"{WEBHOOK_BASE_URL}/webhook",
|
||||||
|
"webhook_data_in_payload": False,
|
||||||
|
"webhook_headers": {
|
||||||
|
"X-Demo": "v0.7.6",
|
||||||
|
"X-Type": "llm"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(f"{CRAWL4AI_BASE_URL}/llm/job", json=payload)
|
||||||
|
if response.ok:
|
||||||
|
task_id = response.json()['task_id']
|
||||||
|
print(f"✅ Job submitted: {task_id}")
|
||||||
|
print("⏳ Webhook will notify when LLM extraction completes...")
|
||||||
|
return task_id
|
||||||
|
else:
|
||||||
|
print(f"❌ Failed: {response.text}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def demo_4_llm_webhook_with_schema():
|
||||||
|
"""Demo 4: LLM extraction with JSON schema and data in webhook (NEW in v0.7.6!)."""
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("DEMO 4: LLM Extraction - Schema + Full Data in Webhook (NEW!)")
|
||||||
|
print("="*70)
|
||||||
|
print("Submitting LLM extraction with JSON schema...")
|
||||||
|
|
||||||
|
schema = {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"title": {"type": "string", "description": "Page title"},
|
||||||
|
"description": {"type": "string", "description": "Page description"},
|
||||||
|
"main_topics": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {"type": "string"},
|
||||||
|
"description": "Main topics covered"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["title"]
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"url": "https://www.python.org",
|
||||||
|
"q": "Extract the title, description, and main topics from this website",
|
||||||
|
"schema": json.dumps(schema),
|
||||||
|
"provider": "openai/gpt-4o-mini",
|
||||||
|
"cache": False,
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": f"{WEBHOOK_BASE_URL}/webhook",
|
||||||
|
"webhook_data_in_payload": True,
|
||||||
|
"webhook_headers": {
|
||||||
|
"X-Demo": "v0.7.6",
|
||||||
|
"X-Type": "llm-with-schema"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(f"{CRAWL4AI_BASE_URL}/llm/job", json=payload)
|
||||||
|
if response.ok:
|
||||||
|
task_id = response.json()['task_id']
|
||||||
|
print(f"✅ Job submitted: {task_id}")
|
||||||
|
print("⏳ Webhook will include structured extraction results...")
|
||||||
|
return task_id
|
||||||
|
else:
|
||||||
|
print(f"❌ Failed: {response.text}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def demo_5_global_webhook_config():
|
||||||
|
"""Demo 5: Using global webhook configuration from config.yml."""
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("DEMO 5: Global Webhook Configuration")
|
||||||
|
print("="*70)
|
||||||
|
print("💡 You can configure a default webhook URL in config.yml:")
|
||||||
|
print("""
|
||||||
|
webhooks:
|
||||||
|
enabled: true
|
||||||
|
default_url: "https://myapp.com/webhooks/default"
|
||||||
|
data_in_payload: false
|
||||||
|
retry:
|
||||||
|
max_attempts: 5
|
||||||
|
initial_delay_ms: 1000
|
||||||
|
max_delay_ms: 32000
|
||||||
|
timeout_ms: 30000
|
||||||
|
""")
|
||||||
|
print("Then submit jobs WITHOUT webhook_config - they'll use the default!")
|
||||||
|
print("This is useful for consistent webhook handling across all jobs.")
|
||||||
|
|
||||||
|
|
||||||
|
def demo_6_webhook_retry_logic():
|
||||||
|
"""Demo 6: Webhook retry mechanism with exponential backoff."""
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("DEMO 6: Webhook Retry Logic")
|
||||||
|
print("="*70)
|
||||||
|
print("🔄 Webhook delivery uses exponential backoff retry:")
|
||||||
|
print(" • Max attempts: 5")
|
||||||
|
print(" • Delays: 1s → 2s → 4s → 8s → 16s")
|
||||||
|
print(" • Timeout: 30s per attempt")
|
||||||
|
print(" • Retries on: 5xx errors, network errors, timeouts")
|
||||||
|
print(" • No retry on: 4xx client errors")
|
||||||
|
print("\nThis ensures reliable webhook delivery even with temporary failures!")
|
||||||
|
|
||||||
|
|
||||||
|
def print_summary():
|
||||||
|
"""Print demo summary and results."""
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("📊 DEMO SUMMARY")
|
||||||
|
print("="*70)
|
||||||
|
print(f"Total webhooks received: {len(received_webhooks)}")
|
||||||
|
|
||||||
|
crawl_webhooks = [w for w in received_webhooks if w['task_type'] == 'crawl']
|
||||||
|
llm_webhooks = [w for w in received_webhooks if w['task_type'] == 'llm_extraction']
|
||||||
|
|
||||||
|
print(f"\nBreakdown:")
|
||||||
|
print(f" 🕷️ Crawl jobs: {len(crawl_webhooks)}")
|
||||||
|
print(f" 🤖 LLM extraction jobs: {len(llm_webhooks)}")
|
||||||
|
|
||||||
|
print(f"\nDetails:")
|
||||||
|
for i, webhook in enumerate(received_webhooks, 1):
|
||||||
|
icon = "🕷️" if webhook['task_type'] == 'crawl' else "🤖"
|
||||||
|
print(f" {i}. {icon} {webhook['task_id']}: {webhook['status']}")
|
||||||
|
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("✨ v0.7.6 KEY FEATURES DEMONSTRATED:")
|
||||||
|
print("="*70)
|
||||||
|
print("✅ Webhook support for /crawl/job")
|
||||||
|
print("✅ Webhook support for /llm/job (NEW!)")
|
||||||
|
print("✅ Notification-only mode (fetch data separately)")
|
||||||
|
print("✅ Data-in-payload mode (get full results in webhook)")
|
||||||
|
print("✅ Custom headers for authentication")
|
||||||
|
print("✅ JSON schema for structured LLM extraction")
|
||||||
|
print("✅ Exponential backoff retry for reliable delivery")
|
||||||
|
print("✅ Global webhook configuration support")
|
||||||
|
print("✅ Universal webhook handler for both job types")
|
||||||
|
print("\n💡 Benefits:")
|
||||||
|
print(" • No more polling - get instant notifications")
|
||||||
|
print(" • Better resource utilization")
|
||||||
|
print(" • Reliable delivery with automatic retries")
|
||||||
|
print(" • Consistent API across crawl and LLM jobs")
|
||||||
|
print(" • Production-ready webhook infrastructure")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run all demos."""
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("🚀 Crawl4AI v0.7.6 Release Demo")
|
||||||
|
print("="*70)
|
||||||
|
print("Feature: Webhook Support for Docker Job Queue API")
|
||||||
|
print("="*70)
|
||||||
|
|
||||||
|
# Check if server is running
|
||||||
|
try:
|
||||||
|
health = requests.get(f"{CRAWL4AI_BASE_URL}/health", timeout=5)
|
||||||
|
print(f"✅ Crawl4AI server is running")
|
||||||
|
except:
|
||||||
|
print(f"❌ Cannot connect to Crawl4AI at {CRAWL4AI_BASE_URL}")
|
||||||
|
print("Please start Docker container:")
|
||||||
|
print(" docker run -d -p 11235:11235 --env-file .llm.env unclecode/crawl4ai:0.7.6")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Start webhook server
|
||||||
|
print(f"\n🌐 Starting webhook server at {WEBHOOK_BASE_URL}...")
|
||||||
|
webhook_thread = Thread(target=start_webhook_server, daemon=True)
|
||||||
|
webhook_thread.start()
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
# Run demos
|
||||||
|
demo_1_crawl_webhook_notification_only()
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
demo_2_crawl_webhook_with_data()
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
demo_3_llm_webhook_notification_only()
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
demo_4_llm_webhook_with_schema()
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
demo_5_global_webhook_config()
|
||||||
|
demo_6_webhook_retry_logic()
|
||||||
|
|
||||||
|
# Wait for webhooks
|
||||||
|
print("\n⏳ Waiting for all webhooks to arrive...")
|
||||||
|
time.sleep(30)
|
||||||
|
|
||||||
|
# Print summary
|
||||||
|
print_summary()
|
||||||
|
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print("✅ Demo completed!")
|
||||||
|
print("="*70)
|
||||||
|
print("\n📚 Documentation:")
|
||||||
|
print(" • deploy/docker/WEBHOOK_EXAMPLES.md")
|
||||||
|
print(" • docs/examples/docker_webhook_example.py")
|
||||||
|
print("\n🔗 Upgrade:")
|
||||||
|
print(" docker pull unclecode/crawl4ai:0.7.6")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
655
docs/releases_review/v0.7.5_docker_hooks_demo.py
Normal file
655
docs/releases_review/v0.7.5_docker_hooks_demo.py
Normal file
@@ -0,0 +1,655 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
🚀 Crawl4AI v0.7.5 - Docker Hooks System Complete Demonstration
|
||||||
|
================================================================
|
||||||
|
|
||||||
|
This file demonstrates the NEW Docker Hooks System introduced in v0.7.5.
|
||||||
|
|
||||||
|
The Docker Hooks System is a completely NEW feature that provides pipeline
|
||||||
|
customization through user-provided Python functions. It offers three approaches:
|
||||||
|
|
||||||
|
1. String-based hooks for REST API
|
||||||
|
2. hooks_to_string() utility to convert functions
|
||||||
|
3. Docker Client with automatic conversion (most convenient)
|
||||||
|
|
||||||
|
All three approaches are part of this NEW v0.7.5 feature!
|
||||||
|
|
||||||
|
Perfect for video recording and demonstration purposes.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Docker container running: docker run -p 11235:11235 unclecode/crawl4ai:latest
|
||||||
|
- crawl4ai v0.7.5 installed: pip install crawl4ai==0.7.5
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
# Import Crawl4AI components
|
||||||
|
from crawl4ai import hooks_to_string
|
||||||
|
from crawl4ai.docker_client import Crawl4aiDockerClient
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
DOCKER_URL = "http://localhost:11235"
|
||||||
|
# DOCKER_URL = "http://localhost:11234"
|
||||||
|
TEST_URLS = [
|
||||||
|
# "https://httpbin.org/html",
|
||||||
|
"https://www.kidocode.com",
|
||||||
|
"https://quotes.toscrape.com",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def print_section(title: str, description: str = ""):
|
||||||
|
"""Print a formatted section header"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print(f" {title}")
|
||||||
|
if description:
|
||||||
|
print(f" {description}")
|
||||||
|
print("=" * 70 + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
def check_docker_service() -> bool:
|
||||||
|
"""Check if Docker service is running"""
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{DOCKER_URL}/health", timeout=3)
|
||||||
|
return response.status_code == 200
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# REUSABLE HOOK LIBRARY (NEW in v0.7.5)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
async def performance_optimization_hook(page, context, **kwargs):
|
||||||
|
"""
|
||||||
|
Performance Hook: Block unnecessary resources to speed up crawling
|
||||||
|
"""
|
||||||
|
print(" [Hook] 🚀 Optimizing performance - blocking images and ads...")
|
||||||
|
|
||||||
|
# Block images
|
||||||
|
await context.route(
|
||||||
|
"**/*.{png,jpg,jpeg,gif,webp,svg,ico}",
|
||||||
|
lambda route: route.abort()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Block ads and analytics
|
||||||
|
await context.route("**/analytics/*", lambda route: route.abort())
|
||||||
|
await context.route("**/ads/*", lambda route: route.abort())
|
||||||
|
await context.route("**/google-analytics.com/*", lambda route: route.abort())
|
||||||
|
|
||||||
|
print(" [Hook] ✓ Performance optimization applied")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def viewport_setup_hook(page, context, **kwargs):
|
||||||
|
"""
|
||||||
|
Viewport Hook: Set consistent viewport size for rendering
|
||||||
|
"""
|
||||||
|
print(" [Hook] 🖥️ Setting viewport to 1920x1080...")
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
print(" [Hook] ✓ Viewport configured")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def authentication_headers_hook(page, context, url, **kwargs):
|
||||||
|
"""
|
||||||
|
Headers Hook: Add custom authentication and tracking headers
|
||||||
|
"""
|
||||||
|
print(f" [Hook] 🔐 Adding custom headers for {url[:50]}...")
|
||||||
|
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'X-Crawl4AI-Version': '0.7.5',
|
||||||
|
'X-Custom-Hook': 'function-based-demo',
|
||||||
|
'Accept-Language': 'en-US,en;q=0.9',
|
||||||
|
'User-Agent': 'Crawl4AI/0.7.5 (Educational Demo)'
|
||||||
|
})
|
||||||
|
|
||||||
|
print(" [Hook] ✓ Custom headers added")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def lazy_loading_handler_hook(page, context, **kwargs):
|
||||||
|
"""
|
||||||
|
Content Hook: Handle lazy-loaded content by scrolling
|
||||||
|
"""
|
||||||
|
print(" [Hook] 📜 Scrolling to load lazy content...")
|
||||||
|
|
||||||
|
# Scroll to bottom
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
|
||||||
|
# Scroll to middle
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight / 2)")
|
||||||
|
await page.wait_for_timeout(500)
|
||||||
|
|
||||||
|
# Scroll back to top
|
||||||
|
await page.evaluate("window.scrollTo(0, 0)")
|
||||||
|
await page.wait_for_timeout(500)
|
||||||
|
|
||||||
|
print(" [Hook] ✓ Lazy content loaded")
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def page_analytics_hook(page, context, **kwargs):
|
||||||
|
"""
|
||||||
|
Analytics Hook: Log page metrics before extraction
|
||||||
|
"""
|
||||||
|
print(" [Hook] 📊 Collecting page analytics...")
|
||||||
|
|
||||||
|
metrics = await page.evaluate('''
|
||||||
|
() => ({
|
||||||
|
title: document.title,
|
||||||
|
images: document.images.length,
|
||||||
|
links: document.links.length,
|
||||||
|
scripts: document.scripts.length,
|
||||||
|
headings: document.querySelectorAll('h1, h2, h3').length,
|
||||||
|
paragraphs: document.querySelectorAll('p').length
|
||||||
|
})
|
||||||
|
''')
|
||||||
|
|
||||||
|
print(f" [Hook] 📈 Page: {metrics['title'][:50]}...")
|
||||||
|
print(f" Links: {metrics['links']}, Images: {metrics['images']}, "
|
||||||
|
f"Headings: {metrics['headings']}, Paragraphs: {metrics['paragraphs']}")
|
||||||
|
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# DEMO 1: String-Based Hooks (NEW Docker Hooks System)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
def demo_1_string_based_hooks():
|
||||||
|
"""
|
||||||
|
Demonstrate string-based hooks with REST API (part of NEW Docker Hooks System)
|
||||||
|
"""
|
||||||
|
print_section(
|
||||||
|
"DEMO 1: String-Based Hooks (REST API)",
|
||||||
|
"Part of the NEW Docker Hooks System - hooks as strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define hooks as strings
|
||||||
|
hooks_config = {
|
||||||
|
"on_page_context_created": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print(" [String Hook] Setting up page context...")
|
||||||
|
# Block images for performance
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_goto": """
|
||||||
|
async def hook(page, context, url, **kwargs):
|
||||||
|
print(f" [String Hook] Navigating to {url[:50]}...")
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'X-Crawl4AI': 'string-based-hooks',
|
||||||
|
'X-Demo': 'v0.7.5'
|
||||||
|
})
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_retrieve_html": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print(" [String Hook] Scrolling page...")
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Prepare request payload
|
||||||
|
payload = {
|
||||||
|
"urls": [TEST_URLS[0]],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_config,
|
||||||
|
"timeout": 30
|
||||||
|
},
|
||||||
|
"crawler_config": {
|
||||||
|
"cache_mode": "bypass"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"🎯 Target URL: {TEST_URLS[0]}")
|
||||||
|
print(f"🔧 Configured {len(hooks_config)} string-based hooks")
|
||||||
|
print(f"📡 Sending request to Docker API...\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
response = requests.post(f"{DOCKER_URL}/crawl", json=payload, timeout=60)
|
||||||
|
execution_time = time.time() - start_time
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
|
||||||
|
print(f"\n✅ Request successful! (took {execution_time:.2f}s)")
|
||||||
|
|
||||||
|
# Display results
|
||||||
|
if result.get('results') and result['results'][0].get('success'):
|
||||||
|
crawl_result = result['results'][0]
|
||||||
|
html_length = len(crawl_result.get('html', ''))
|
||||||
|
markdown_length = len(crawl_result.get('markdown', ''))
|
||||||
|
|
||||||
|
print(f"\n📊 Results:")
|
||||||
|
print(f" • HTML length: {html_length:,} characters")
|
||||||
|
print(f" • Markdown length: {markdown_length:,} characters")
|
||||||
|
print(f" • URL: {crawl_result.get('url')}")
|
||||||
|
|
||||||
|
# Check hooks execution
|
||||||
|
if 'hooks' in result:
|
||||||
|
hooks_info = result['hooks']
|
||||||
|
print(f"\n🎣 Hooks Execution:")
|
||||||
|
print(f" • Status: {hooks_info['status']['status']}")
|
||||||
|
print(f" • Attached hooks: {len(hooks_info['status']['attached_hooks'])}")
|
||||||
|
|
||||||
|
if 'summary' in hooks_info:
|
||||||
|
summary = hooks_info['summary']
|
||||||
|
print(f" • Total executions: {summary['total_executions']}")
|
||||||
|
print(f" • Successful: {summary['successful']}")
|
||||||
|
print(f" • Success rate: {summary['success_rate']:.1f}%")
|
||||||
|
else:
|
||||||
|
print(f"⚠️ Crawl completed but no results")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(f"❌ Request failed with status {response.status_code}")
|
||||||
|
print(f" Error: {response.text[:200]}")
|
||||||
|
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
print("⏰ Request timed out after 60 seconds")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {str(e)}")
|
||||||
|
|
||||||
|
print("\n" + "─" * 70)
|
||||||
|
print("✓ String-based hooks demo complete\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# DEMO 2: Function-Based Hooks with hooks_to_string() Utility
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
def demo_2_hooks_to_string_utility():
|
||||||
|
"""
|
||||||
|
Demonstrate the new hooks_to_string() utility for converting functions
|
||||||
|
"""
|
||||||
|
print_section(
|
||||||
|
"DEMO 2: hooks_to_string() Utility (NEW! ✨)",
|
||||||
|
"Convert Python functions to strings for REST API"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("📦 Creating hook functions...")
|
||||||
|
print(" • performance_optimization_hook")
|
||||||
|
print(" • viewport_setup_hook")
|
||||||
|
print(" • authentication_headers_hook")
|
||||||
|
print(" • lazy_loading_handler_hook")
|
||||||
|
|
||||||
|
# Convert function objects to strings using the NEW utility
|
||||||
|
print("\n🔄 Converting functions to strings with hooks_to_string()...")
|
||||||
|
|
||||||
|
hooks_dict = {
|
||||||
|
"on_page_context_created": performance_optimization_hook,
|
||||||
|
"before_goto": authentication_headers_hook,
|
||||||
|
"before_retrieve_html": lazy_loading_handler_hook,
|
||||||
|
}
|
||||||
|
|
||||||
|
hooks_as_strings = hooks_to_string(hooks_dict)
|
||||||
|
|
||||||
|
print(f"✅ Successfully converted {len(hooks_as_strings)} functions to strings")
|
||||||
|
|
||||||
|
# Show a preview
|
||||||
|
print("\n📝 Sample converted hook (first 250 characters):")
|
||||||
|
print("─" * 70)
|
||||||
|
sample_hook = list(hooks_as_strings.values())[0]
|
||||||
|
print(sample_hook[:250] + "...")
|
||||||
|
print("─" * 70)
|
||||||
|
|
||||||
|
# Use the converted hooks with REST API
|
||||||
|
print("\n📡 Using converted hooks with REST API...")
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"urls": [TEST_URLS[0]],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_as_strings,
|
||||||
|
"timeout": 30
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
response = requests.post(f"{DOCKER_URL}/crawl", json=payload, timeout=60)
|
||||||
|
execution_time = time.time() - start_time
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
print(f"\n✅ Request successful! (took {execution_time:.2f}s)")
|
||||||
|
|
||||||
|
if result.get('results') and result['results'][0].get('success'):
|
||||||
|
crawl_result = result['results'][0]
|
||||||
|
print(f" • HTML length: {len(crawl_result.get('html', '')):,} characters")
|
||||||
|
print(f" • Hooks executed successfully!")
|
||||||
|
else:
|
||||||
|
print(f"❌ Request failed: {response.status_code}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {str(e)}")
|
||||||
|
|
||||||
|
print("\n💡 Benefits of hooks_to_string():")
|
||||||
|
print(" ✓ Write hooks as regular Python functions")
|
||||||
|
print(" ✓ Full IDE support (autocomplete, syntax highlighting)")
|
||||||
|
print(" ✓ Type checking and linting")
|
||||||
|
print(" ✓ Easy to test and debug")
|
||||||
|
print(" ✓ Reusable across projects")
|
||||||
|
print(" ✓ Works with any REST API client")
|
||||||
|
|
||||||
|
print("\n" + "─" * 70)
|
||||||
|
print("✓ hooks_to_string() utility demo complete\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# DEMO 3: Docker Client with Automatic Conversion (RECOMMENDED! 🌟)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
async def demo_3_docker_client_auto_conversion():
|
||||||
|
"""
|
||||||
|
Demonstrate Docker Client with automatic hook conversion (RECOMMENDED)
|
||||||
|
"""
|
||||||
|
print_section(
|
||||||
|
"DEMO 3: Docker Client with Auto-Conversion (RECOMMENDED! 🌟)",
|
||||||
|
"Pass function objects directly - conversion happens automatically!"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("🐳 Initializing Crawl4AI Docker Client...")
|
||||||
|
client = Crawl4aiDockerClient(base_url=DOCKER_URL)
|
||||||
|
|
||||||
|
print("✅ Client ready!\n")
|
||||||
|
|
||||||
|
# Use our reusable hook library - just pass the function objects!
|
||||||
|
print("📚 Using reusable hook library:")
|
||||||
|
print(" • performance_optimization_hook")
|
||||||
|
print(" • viewport_setup_hook")
|
||||||
|
print(" • authentication_headers_hook")
|
||||||
|
print(" • lazy_loading_handler_hook")
|
||||||
|
print(" • page_analytics_hook")
|
||||||
|
|
||||||
|
print("\n🎯 Target URL: " + TEST_URLS[1])
|
||||||
|
print("🚀 Starting crawl with automatic hook conversion...\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
# Pass function objects directly - NO manual conversion needed! ✨
|
||||||
|
results = await client.crawl(
|
||||||
|
urls=[TEST_URLS[0]],
|
||||||
|
hooks={
|
||||||
|
"on_page_context_created": performance_optimization_hook,
|
||||||
|
"before_goto": authentication_headers_hook,
|
||||||
|
"before_retrieve_html": lazy_loading_handler_hook,
|
||||||
|
"before_return_html": page_analytics_hook,
|
||||||
|
},
|
||||||
|
hooks_timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
execution_time = time.time() - start_time
|
||||||
|
|
||||||
|
print(f"\n✅ Crawl completed! (took {execution_time:.2f}s)\n")
|
||||||
|
|
||||||
|
# Display results
|
||||||
|
if results and results.success:
|
||||||
|
result = results
|
||||||
|
print(f"📊 Results:")
|
||||||
|
print(f" • URL: {result.url}")
|
||||||
|
print(f" • Success: {result.success}")
|
||||||
|
print(f" • HTML length: {len(result.html):,} characters")
|
||||||
|
print(f" • Markdown length: {len(result.markdown):,} characters")
|
||||||
|
|
||||||
|
# Show metadata
|
||||||
|
if result.metadata:
|
||||||
|
print(f"\n📋 Metadata:")
|
||||||
|
print(f" • Title: {result.metadata.get('title', 'N/A')}")
|
||||||
|
print(f" • Description: {result.metadata.get('description', 'N/A')}")
|
||||||
|
|
||||||
|
# Show links
|
||||||
|
if result.links:
|
||||||
|
internal_count = len(result.links.get('internal', []))
|
||||||
|
external_count = len(result.links.get('external', []))
|
||||||
|
print(f"\n🔗 Links Found:")
|
||||||
|
print(f" • Internal: {internal_count}")
|
||||||
|
print(f" • External: {external_count}")
|
||||||
|
else:
|
||||||
|
print(f"⚠️ Crawl completed but no successful results")
|
||||||
|
if results:
|
||||||
|
print(f" Error: {results.error_message}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
print("\n🌟 Why Docker Client is RECOMMENDED:")
|
||||||
|
print(" ✓ Automatic function-to-string conversion")
|
||||||
|
print(" ✓ No manual hooks_to_string() calls needed")
|
||||||
|
print(" ✓ Cleaner, more Pythonic code")
|
||||||
|
print(" ✓ Full type hints and IDE support")
|
||||||
|
print(" ✓ Built-in error handling")
|
||||||
|
print(" ✓ Async/await support")
|
||||||
|
|
||||||
|
print("\n" + "─" * 70)
|
||||||
|
print("✓ Docker Client auto-conversion demo complete\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# DEMO 4: Advanced Use Case - Complete Hook Pipeline
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
async def demo_4_complete_hook_pipeline():
|
||||||
|
"""
|
||||||
|
Demonstrate a complete hook pipeline using all 8 hook points
|
||||||
|
"""
|
||||||
|
print_section(
|
||||||
|
"DEMO 4: Complete Hook Pipeline",
|
||||||
|
"Using all 8 available hook points for comprehensive control"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define all 8 hooks
|
||||||
|
async def on_browser_created_hook(browser, **kwargs):
|
||||||
|
"""Hook 1: Called after browser is created"""
|
||||||
|
print(" [Pipeline] 1/8 Browser created")
|
||||||
|
return browser
|
||||||
|
|
||||||
|
async def on_page_context_created_hook(page, context, **kwargs):
|
||||||
|
"""Hook 2: Called after page context is created"""
|
||||||
|
print(" [Pipeline] 2/8 Page context created - setting up...")
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def on_user_agent_updated_hook(page, context, user_agent, **kwargs):
|
||||||
|
"""Hook 3: Called when user agent is updated"""
|
||||||
|
print(f" [Pipeline] 3/8 User agent updated: {user_agent[:50]}...")
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def before_goto_hook(page, context, url, **kwargs):
|
||||||
|
"""Hook 4: Called before navigating to URL"""
|
||||||
|
print(f" [Pipeline] 4/8 Before navigation to: {url[:60]}...")
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def after_goto_hook(page, context, url, response, **kwargs):
|
||||||
|
"""Hook 5: Called after navigation completes"""
|
||||||
|
print(f" [Pipeline] 5/8 After navigation - Status: {response.status if response else 'N/A'}")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def on_execution_started_hook(page, context, **kwargs):
|
||||||
|
"""Hook 6: Called when JavaScript execution starts"""
|
||||||
|
print(" [Pipeline] 6/8 JavaScript execution started")
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def before_retrieve_html_hook(page, context, **kwargs):
|
||||||
|
"""Hook 7: Called before retrieving HTML"""
|
||||||
|
print(" [Pipeline] 7/8 Before HTML retrieval - scrolling...")
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def before_return_html_hook(page, context, html, **kwargs):
|
||||||
|
"""Hook 8: Called before returning HTML"""
|
||||||
|
print(f" [Pipeline] 8/8 Before return - HTML length: {len(html):,} chars")
|
||||||
|
return page
|
||||||
|
|
||||||
|
print("🎯 Target URL: " + TEST_URLS[0])
|
||||||
|
print("🔧 Configured ALL 8 hook points for complete pipeline control\n")
|
||||||
|
|
||||||
|
client = Crawl4aiDockerClient(base_url=DOCKER_URL)
|
||||||
|
|
||||||
|
try:
|
||||||
|
print("🚀 Starting complete pipeline crawl...\n")
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
results = await client.crawl(
|
||||||
|
urls=[TEST_URLS[0]],
|
||||||
|
hooks={
|
||||||
|
"on_browser_created": on_browser_created_hook,
|
||||||
|
"on_page_context_created": on_page_context_created_hook,
|
||||||
|
"on_user_agent_updated": on_user_agent_updated_hook,
|
||||||
|
"before_goto": before_goto_hook,
|
||||||
|
"after_goto": after_goto_hook,
|
||||||
|
"on_execution_started": on_execution_started_hook,
|
||||||
|
"before_retrieve_html": before_retrieve_html_hook,
|
||||||
|
"before_return_html": before_return_html_hook,
|
||||||
|
},
|
||||||
|
hooks_timeout=45
|
||||||
|
)
|
||||||
|
|
||||||
|
execution_time = time.time() - start_time
|
||||||
|
|
||||||
|
if results and results.success:
|
||||||
|
print(f"\n✅ Complete pipeline executed successfully! (took {execution_time:.2f}s)")
|
||||||
|
print(f" • All 8 hooks executed in sequence")
|
||||||
|
print(f" • HTML length: {len(results.html):,} characters")
|
||||||
|
else:
|
||||||
|
print(f"⚠️ Pipeline completed with warnings")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {str(e)}")
|
||||||
|
|
||||||
|
print("\n📚 Available Hook Points:")
|
||||||
|
print(" 1. on_browser_created - Browser initialization")
|
||||||
|
print(" 2. on_page_context_created - Page context setup")
|
||||||
|
print(" 3. on_user_agent_updated - User agent configuration")
|
||||||
|
print(" 4. before_goto - Pre-navigation setup")
|
||||||
|
print(" 5. after_goto - Post-navigation processing")
|
||||||
|
print(" 6. on_execution_started - JavaScript execution start")
|
||||||
|
print(" 7. before_retrieve_html - Pre-extraction processing")
|
||||||
|
print(" 8. before_return_html - Final HTML processing")
|
||||||
|
|
||||||
|
print("\n" + "─" * 70)
|
||||||
|
print("✓ Complete hook pipeline demo complete\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# MAIN EXECUTION
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""
|
||||||
|
Run all demonstrations
|
||||||
|
"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print(" 🚀 Crawl4AI v0.7.5 - Docker Hooks Complete Demonstration")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# Check Docker service
|
||||||
|
print("\n🔍 Checking Docker service status...")
|
||||||
|
if not check_docker_service():
|
||||||
|
print("❌ Docker service is not running!")
|
||||||
|
print("\n📋 To start the Docker service:")
|
||||||
|
print(" docker run -p 11235:11235 unclecode/crawl4ai:latest")
|
||||||
|
print("\nPlease start the service and run this demo again.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print("✅ Docker service is running!\n")
|
||||||
|
|
||||||
|
# Run all demos
|
||||||
|
demos = [
|
||||||
|
("String-Based Hooks (REST API)", demo_1_string_based_hooks, False),
|
||||||
|
("hooks_to_string() Utility", demo_2_hooks_to_string_utility, False),
|
||||||
|
("Docker Client Auto-Conversion", demo_3_docker_client_auto_conversion, True),
|
||||||
|
# ("Complete Hook Pipeline", demo_4_complete_hook_pipeline, True),
|
||||||
|
]
|
||||||
|
|
||||||
|
for i, (name, demo_func, is_async) in enumerate(demos, 1):
|
||||||
|
print(f"\n{'🔷' * 35}")
|
||||||
|
print(f"Starting Demo {i}/{len(demos)}: {name}")
|
||||||
|
print(f"{'🔷' * 35}\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
if is_async:
|
||||||
|
await demo_func()
|
||||||
|
else:
|
||||||
|
demo_func()
|
||||||
|
|
||||||
|
print(f"✅ Demo {i} completed successfully!")
|
||||||
|
|
||||||
|
# Pause between demos (except the last one)
|
||||||
|
if i < len(demos):
|
||||||
|
print("\n⏸️ Press Enter to continue to next demo...")
|
||||||
|
# input()
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print(f"\n⏹️ Demo interrupted by user")
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n❌ Demo {i} failed: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
print("\nContinuing to next demo...\n")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Final summary
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print(" 🎉 All Demonstrations Complete!")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
print("\n📊 Summary of v0.7.5 Docker Hooks System:")
|
||||||
|
print("\n🆕 COMPLETELY NEW FEATURE in v0.7.5:")
|
||||||
|
print(" The Docker Hooks System lets you customize the crawling pipeline")
|
||||||
|
print(" with user-provided Python functions at 8 strategic points.")
|
||||||
|
|
||||||
|
print("\n✨ Three Ways to Use Docker Hooks (All NEW!):")
|
||||||
|
print(" 1. String-based - Write hooks as strings for REST API")
|
||||||
|
print(" 2. hooks_to_string() - Convert Python functions to strings")
|
||||||
|
print(" 3. Docker Client - Automatic conversion (RECOMMENDED)")
|
||||||
|
|
||||||
|
print("\n💡 Key Benefits:")
|
||||||
|
print(" ✓ Full IDE support (autocomplete, syntax highlighting)")
|
||||||
|
print(" ✓ Type checking and linting")
|
||||||
|
print(" ✓ Easy to test and debug")
|
||||||
|
print(" ✓ Reusable across projects")
|
||||||
|
print(" ✓ Complete pipeline control")
|
||||||
|
|
||||||
|
print("\n🎯 8 Hook Points Available:")
|
||||||
|
print(" • on_browser_created, on_page_context_created")
|
||||||
|
print(" • on_user_agent_updated, before_goto, after_goto")
|
||||||
|
print(" • on_execution_started, before_retrieve_html, before_return_html")
|
||||||
|
|
||||||
|
print("\n📚 Resources:")
|
||||||
|
print(" • Docs: https://docs.crawl4ai.com")
|
||||||
|
print(" • GitHub: https://github.com/unclecode/crawl4ai")
|
||||||
|
print(" • Discord: https://discord.gg/jP8KfhDhyN")
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print(" Happy Crawling with v0.7.5! 🕷️")
|
||||||
|
print("=" * 70 + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("\n🎬 Starting Crawl4AI v0.7.5 Docker Hooks Demonstration...")
|
||||||
|
print("Press Ctrl+C anytime to exit\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(main())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n\n👋 Demo stopped by user. Thanks for exploring Crawl4AI v0.7.5!")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n\n❌ Demo error: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
1516
docs/releases_review/v0.7.5_video_walkthrough.ipynb
Normal file
1516
docs/releases_review/v0.7.5_video_walkthrough.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
12
mkdocs.yml
12
mkdocs.yml
@@ -1,5 +1,4 @@
|
|||||||
site_name: Crawl4AI Documentation (v0.7.x)
|
site_name: Crawl4AI Documentation (v0.7.x)
|
||||||
site_favicon: docs/md_v2/favicon.ico
|
|
||||||
site_description: 🚀🤖 Crawl4AI, Open-source LLM-Friendly Web Crawler & Scraper
|
site_description: 🚀🤖 Crawl4AI, Open-source LLM-Friendly Web Crawler & Scraper
|
||||||
site_url: https://docs.crawl4ai.com
|
site_url: https://docs.crawl4ai.com
|
||||||
repo_url: https://github.com/unclecode/crawl4ai
|
repo_url: https://github.com/unclecode/crawl4ai
|
||||||
@@ -8,6 +7,7 @@ docs_dir: docs/md_v2
|
|||||||
|
|
||||||
nav:
|
nav:
|
||||||
- Home: 'index.md'
|
- Home: 'index.md'
|
||||||
|
- "📚 Complete SDK Reference": "complete-sdk-reference.md"
|
||||||
- "Ask AI": "core/ask-ai.md"
|
- "Ask AI": "core/ask-ai.md"
|
||||||
- "Quick Start": "core/quickstart.md"
|
- "Quick Start": "core/quickstart.md"
|
||||||
- "Code Examples": "core/examples.md"
|
- "Code Examples": "core/examples.md"
|
||||||
@@ -15,6 +15,8 @@ nav:
|
|||||||
- "Demo Apps": "apps/index.md"
|
- "Demo Apps": "apps/index.md"
|
||||||
- "C4A-Script Editor": "apps/c4a-script/index.html"
|
- "C4A-Script Editor": "apps/c4a-script/index.html"
|
||||||
- "LLM Context Builder": "apps/llmtxt/index.html"
|
- "LLM Context Builder": "apps/llmtxt/index.html"
|
||||||
|
- "Marketplace": "marketplace/index.html"
|
||||||
|
- "Marketplace Admin": "marketplace/admin/index.html"
|
||||||
- Setup & Installation:
|
- Setup & Installation:
|
||||||
- "Installation": "core/installation.md"
|
- "Installation": "core/installation.md"
|
||||||
- "Docker Deployment": "core/docker-deployment.md"
|
- "Docker Deployment": "core/docker-deployment.md"
|
||||||
@@ -66,10 +68,12 @@ nav:
|
|||||||
- "CrawlResult": "api/crawl-result.md"
|
- "CrawlResult": "api/crawl-result.md"
|
||||||
- "Strategies": "api/strategies.md"
|
- "Strategies": "api/strategies.md"
|
||||||
- "C4A-Script Reference": "api/c4a-script-reference.md"
|
- "C4A-Script Reference": "api/c4a-script-reference.md"
|
||||||
|
- "Brand Book": "branding/index.md"
|
||||||
|
|
||||||
theme:
|
theme:
|
||||||
name: 'terminal'
|
name: 'terminal'
|
||||||
palette: 'dark'
|
palette: 'dark'
|
||||||
|
favicon: favicon.ico
|
||||||
custom_dir: docs/md_v2/overrides
|
custom_dir: docs/md_v2/overrides
|
||||||
color_mode: 'dark'
|
color_mode: 'dark'
|
||||||
icon:
|
icon:
|
||||||
@@ -98,6 +102,7 @@ extra_css:
|
|||||||
- assets/highlight.css
|
- assets/highlight.css
|
||||||
- assets/dmvendor.css
|
- assets/dmvendor.css
|
||||||
- assets/feedback-overrides.css
|
- assets/feedback-overrides.css
|
||||||
|
- assets/page_actions.css
|
||||||
|
|
||||||
extra_javascript:
|
extra_javascript:
|
||||||
- https://www.googletagmanager.com/gtag/js?id=G-58W0K2ZQ25
|
- https://www.googletagmanager.com/gtag/js?id=G-58W0K2ZQ25
|
||||||
@@ -106,8 +111,9 @@ extra_javascript:
|
|||||||
- assets/highlight_init.js
|
- assets/highlight_init.js
|
||||||
- https://buttons.github.io/buttons.js
|
- https://buttons.github.io/buttons.js
|
||||||
- assets/toc.js
|
- assets/toc.js
|
||||||
- assets/github_stats.js
|
- assets/github_stats.js
|
||||||
- assets/selection_ask_ai.js
|
- assets/selection_ask_ai.js
|
||||||
- assets/copy_code.js
|
- assets/copy_code.js
|
||||||
- assets/floating_ask_ai_button.js
|
- assets/floating_ask_ai_button.js
|
||||||
- assets/mobile_menu.js
|
- assets/mobile_menu.js
|
||||||
|
- assets/page_actions.js?v=20251006
|
||||||
401
test_llm_webhook_feature.py
Normal file
401
test_llm_webhook_feature.py
Normal file
@@ -0,0 +1,401 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test script to validate webhook implementation for /llm/job endpoint.
|
||||||
|
|
||||||
|
This tests that the /llm/job endpoint now supports webhooks
|
||||||
|
following the same pattern as /crawl/job.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Add deploy/docker to path
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'deploy', 'docker'))
|
||||||
|
|
||||||
|
def test_llm_job_payload_model():
|
||||||
|
"""Test that LlmJobPayload includes webhook_config field"""
|
||||||
|
print("=" * 60)
|
||||||
|
print("TEST 1: LlmJobPayload Model")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from job import LlmJobPayload
|
||||||
|
from schemas import WebhookConfig
|
||||||
|
from pydantic import ValidationError
|
||||||
|
|
||||||
|
# Test with webhook_config
|
||||||
|
payload_dict = {
|
||||||
|
"url": "https://example.com",
|
||||||
|
"q": "Extract main content",
|
||||||
|
"schema": None,
|
||||||
|
"cache": False,
|
||||||
|
"provider": None,
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "https://myapp.com/webhook",
|
||||||
|
"webhook_data_in_payload": True,
|
||||||
|
"webhook_headers": {"X-Secret": "token"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = LlmJobPayload(**payload_dict)
|
||||||
|
|
||||||
|
print(f"✅ LlmJobPayload accepts webhook_config")
|
||||||
|
print(f" - URL: {payload.url}")
|
||||||
|
print(f" - Query: {payload.q}")
|
||||||
|
print(f" - Webhook URL: {payload.webhook_config.webhook_url}")
|
||||||
|
print(f" - Data in payload: {payload.webhook_config.webhook_data_in_payload}")
|
||||||
|
|
||||||
|
# Test without webhook_config (should be optional)
|
||||||
|
minimal_payload = {
|
||||||
|
"url": "https://example.com",
|
||||||
|
"q": "Extract content"
|
||||||
|
}
|
||||||
|
|
||||||
|
payload2 = LlmJobPayload(**minimal_payload)
|
||||||
|
assert payload2.webhook_config is None, "webhook_config should be optional"
|
||||||
|
print(f"✅ LlmJobPayload works without webhook_config (optional)")
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_handle_llm_request_signature():
|
||||||
|
"""Test that handle_llm_request accepts webhook_config parameter"""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST 2: handle_llm_request Function Signature")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from api import handle_llm_request
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
sig = inspect.signature(handle_llm_request)
|
||||||
|
params = list(sig.parameters.keys())
|
||||||
|
|
||||||
|
print(f"Function parameters: {params}")
|
||||||
|
|
||||||
|
if 'webhook_config' in params:
|
||||||
|
print(f"✅ handle_llm_request has webhook_config parameter")
|
||||||
|
|
||||||
|
# Check that it's optional with default None
|
||||||
|
webhook_param = sig.parameters['webhook_config']
|
||||||
|
if webhook_param.default is None or webhook_param.default == inspect.Parameter.empty:
|
||||||
|
print(f"✅ webhook_config is optional (default: {webhook_param.default})")
|
||||||
|
else:
|
||||||
|
print(f"⚠️ webhook_config default is: {webhook_param.default}")
|
||||||
|
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(f"❌ handle_llm_request missing webhook_config parameter")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_process_llm_extraction_signature():
|
||||||
|
"""Test that process_llm_extraction accepts webhook_config parameter"""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST 3: process_llm_extraction Function Signature")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from api import process_llm_extraction
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
sig = inspect.signature(process_llm_extraction)
|
||||||
|
params = list(sig.parameters.keys())
|
||||||
|
|
||||||
|
print(f"Function parameters: {params}")
|
||||||
|
|
||||||
|
if 'webhook_config' in params:
|
||||||
|
print(f"✅ process_llm_extraction has webhook_config parameter")
|
||||||
|
|
||||||
|
webhook_param = sig.parameters['webhook_config']
|
||||||
|
if webhook_param.default is None or webhook_param.default == inspect.Parameter.empty:
|
||||||
|
print(f"✅ webhook_config is optional (default: {webhook_param.default})")
|
||||||
|
else:
|
||||||
|
print(f"⚠️ webhook_config default is: {webhook_param.default}")
|
||||||
|
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(f"❌ process_llm_extraction missing webhook_config parameter")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_webhook_integration_in_api():
|
||||||
|
"""Test that api.py properly integrates webhook notifications"""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST 4: Webhook Integration in process_llm_extraction")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
api_file = os.path.join(os.path.dirname(__file__), 'deploy', 'docker', 'api.py')
|
||||||
|
|
||||||
|
with open(api_file, 'r') as f:
|
||||||
|
api_content = f.read()
|
||||||
|
|
||||||
|
# Check for WebhookDeliveryService initialization
|
||||||
|
if 'webhook_service = WebhookDeliveryService(config)' in api_content:
|
||||||
|
print("✅ process_llm_extraction initializes WebhookDeliveryService")
|
||||||
|
else:
|
||||||
|
print("❌ Missing WebhookDeliveryService initialization in process_llm_extraction")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check for notify_job_completion calls with llm_extraction
|
||||||
|
if 'task_type="llm_extraction"' in api_content:
|
||||||
|
print("✅ Uses correct task_type='llm_extraction' for notifications")
|
||||||
|
else:
|
||||||
|
print("❌ Missing task_type='llm_extraction' in webhook notifications")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Count webhook notification calls (should have at least 3: success + 2 failure paths)
|
||||||
|
notification_count = api_content.count('await webhook_service.notify_job_completion')
|
||||||
|
# Find only in process_llm_extraction function
|
||||||
|
llm_func_start = api_content.find('async def process_llm_extraction')
|
||||||
|
llm_func_end = api_content.find('\nasync def ', llm_func_start + 1)
|
||||||
|
if llm_func_end == -1:
|
||||||
|
llm_func_end = len(api_content)
|
||||||
|
|
||||||
|
llm_func_content = api_content[llm_func_start:llm_func_end]
|
||||||
|
llm_notification_count = llm_func_content.count('await webhook_service.notify_job_completion')
|
||||||
|
|
||||||
|
print(f"✅ Found {llm_notification_count} webhook notification calls in process_llm_extraction")
|
||||||
|
|
||||||
|
if llm_notification_count >= 3:
|
||||||
|
print(f"✅ Sufficient notification points (success + failure paths)")
|
||||||
|
else:
|
||||||
|
print(f"⚠️ Expected at least 3 notification calls, found {llm_notification_count}")
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_job_endpoint_integration():
|
||||||
|
"""Test that /llm/job endpoint extracts and passes webhook_config"""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST 5: /llm/job Endpoint Integration")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
job_file = os.path.join(os.path.dirname(__file__), 'deploy', 'docker', 'job.py')
|
||||||
|
|
||||||
|
with open(job_file, 'r') as f:
|
||||||
|
job_content = f.read()
|
||||||
|
|
||||||
|
# Find the llm_job_enqueue function
|
||||||
|
llm_job_start = job_content.find('async def llm_job_enqueue')
|
||||||
|
llm_job_end = job_content.find('\n\n@router', llm_job_start + 1)
|
||||||
|
if llm_job_end == -1:
|
||||||
|
llm_job_end = job_content.find('\n\nasync def', llm_job_start + 1)
|
||||||
|
|
||||||
|
llm_job_func = job_content[llm_job_start:llm_job_end]
|
||||||
|
|
||||||
|
# Check for webhook_config extraction
|
||||||
|
if 'webhook_config = None' in llm_job_func:
|
||||||
|
print("✅ llm_job_enqueue initializes webhook_config variable")
|
||||||
|
else:
|
||||||
|
print("❌ Missing webhook_config initialization")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if 'if payload.webhook_config:' in llm_job_func:
|
||||||
|
print("✅ llm_job_enqueue checks for payload.webhook_config")
|
||||||
|
else:
|
||||||
|
print("❌ Missing webhook_config check")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if 'webhook_config = payload.webhook_config.model_dump(mode=\'json\')' in llm_job_func:
|
||||||
|
print("✅ llm_job_enqueue converts webhook_config to dict")
|
||||||
|
else:
|
||||||
|
print("❌ Missing webhook_config.model_dump conversion")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if 'webhook_config=webhook_config' in llm_job_func:
|
||||||
|
print("✅ llm_job_enqueue passes webhook_config to handle_llm_request")
|
||||||
|
else:
|
||||||
|
print("❌ Missing webhook_config parameter in handle_llm_request call")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_create_new_task_integration():
|
||||||
|
"""Test that create_new_task stores webhook_config in Redis"""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST 6: create_new_task Webhook Storage")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
api_file = os.path.join(os.path.dirname(__file__), 'deploy', 'docker', 'api.py')
|
||||||
|
|
||||||
|
with open(api_file, 'r') as f:
|
||||||
|
api_content = f.read()
|
||||||
|
|
||||||
|
# Find create_new_task function
|
||||||
|
create_task_start = api_content.find('async def create_new_task')
|
||||||
|
create_task_end = api_content.find('\nasync def ', create_task_start + 1)
|
||||||
|
if create_task_end == -1:
|
||||||
|
create_task_end = len(api_content)
|
||||||
|
|
||||||
|
create_task_func = api_content[create_task_start:create_task_end]
|
||||||
|
|
||||||
|
# Check for webhook_config storage
|
||||||
|
if 'if webhook_config:' in create_task_func:
|
||||||
|
print("✅ create_new_task checks for webhook_config")
|
||||||
|
else:
|
||||||
|
print("❌ Missing webhook_config check in create_new_task")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if 'task_data["webhook_config"] = json.dumps(webhook_config)' in create_task_func:
|
||||||
|
print("✅ create_new_task stores webhook_config in Redis task data")
|
||||||
|
else:
|
||||||
|
print("❌ Missing webhook_config storage in task_data")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check that webhook_config is passed to process_llm_extraction
|
||||||
|
if 'webhook_config' in create_task_func and 'background_tasks.add_task' in create_task_func:
|
||||||
|
print("✅ create_new_task passes webhook_config to background task")
|
||||||
|
else:
|
||||||
|
print("⚠️ Could not verify webhook_config passed to background task")
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_pattern_consistency():
|
||||||
|
"""Test that /llm/job follows the same pattern as /crawl/job"""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST 7: Pattern Consistency with /crawl/job")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
api_file = os.path.join(os.path.dirname(__file__), 'deploy', 'docker', 'api.py')
|
||||||
|
|
||||||
|
with open(api_file, 'r') as f:
|
||||||
|
api_content = f.read()
|
||||||
|
|
||||||
|
# Find handle_crawl_job to compare pattern
|
||||||
|
crawl_job_start = api_content.find('async def handle_crawl_job')
|
||||||
|
crawl_job_end = api_content.find('\nasync def ', crawl_job_start + 1)
|
||||||
|
if crawl_job_end == -1:
|
||||||
|
crawl_job_end = len(api_content)
|
||||||
|
crawl_job_func = api_content[crawl_job_start:crawl_job_end]
|
||||||
|
|
||||||
|
# Find process_llm_extraction
|
||||||
|
llm_extract_start = api_content.find('async def process_llm_extraction')
|
||||||
|
llm_extract_end = api_content.find('\nasync def ', llm_extract_start + 1)
|
||||||
|
if llm_extract_end == -1:
|
||||||
|
llm_extract_end = len(api_content)
|
||||||
|
llm_extract_func = api_content[llm_extract_start:llm_extract_end]
|
||||||
|
|
||||||
|
print("Checking pattern consistency...")
|
||||||
|
|
||||||
|
# Both should initialize WebhookDeliveryService
|
||||||
|
crawl_has_service = 'webhook_service = WebhookDeliveryService(config)' in crawl_job_func
|
||||||
|
llm_has_service = 'webhook_service = WebhookDeliveryService(config)' in llm_extract_func
|
||||||
|
|
||||||
|
if crawl_has_service and llm_has_service:
|
||||||
|
print("✅ Both initialize WebhookDeliveryService")
|
||||||
|
else:
|
||||||
|
print(f"❌ Service initialization mismatch (crawl: {crawl_has_service}, llm: {llm_has_service})")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Both should call notify_job_completion on success
|
||||||
|
crawl_notifies_success = 'status="completed"' in crawl_job_func and 'notify_job_completion' in crawl_job_func
|
||||||
|
llm_notifies_success = 'status="completed"' in llm_extract_func and 'notify_job_completion' in llm_extract_func
|
||||||
|
|
||||||
|
if crawl_notifies_success and llm_notifies_success:
|
||||||
|
print("✅ Both notify on success")
|
||||||
|
else:
|
||||||
|
print(f"❌ Success notification mismatch (crawl: {crawl_notifies_success}, llm: {llm_notifies_success})")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Both should call notify_job_completion on failure
|
||||||
|
crawl_notifies_failure = 'status="failed"' in crawl_job_func and 'error=' in crawl_job_func
|
||||||
|
llm_notifies_failure = 'status="failed"' in llm_extract_func and 'error=' in llm_extract_func
|
||||||
|
|
||||||
|
if crawl_notifies_failure and llm_notifies_failure:
|
||||||
|
print("✅ Both notify on failure")
|
||||||
|
else:
|
||||||
|
print(f"❌ Failure notification mismatch (crawl: {crawl_notifies_failure}, llm: {llm_notifies_failure})")
|
||||||
|
return False
|
||||||
|
|
||||||
|
print("✅ /llm/job follows the same pattern as /crawl/job")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run all tests"""
|
||||||
|
print("\n🧪 LLM Job Webhook Feature Validation")
|
||||||
|
print("=" * 60)
|
||||||
|
print("Testing that /llm/job now supports webhooks like /crawl/job")
|
||||||
|
print("=" * 60 + "\n")
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
# Run all tests
|
||||||
|
results.append(("LlmJobPayload Model", test_llm_job_payload_model()))
|
||||||
|
results.append(("handle_llm_request Signature", test_handle_llm_request_signature()))
|
||||||
|
results.append(("process_llm_extraction Signature", test_process_llm_extraction_signature()))
|
||||||
|
results.append(("Webhook Integration", test_webhook_integration_in_api()))
|
||||||
|
results.append(("/llm/job Endpoint", test_job_endpoint_integration()))
|
||||||
|
results.append(("create_new_task Storage", test_create_new_task_integration()))
|
||||||
|
results.append(("Pattern Consistency", test_pattern_consistency()))
|
||||||
|
|
||||||
|
# Print summary
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST SUMMARY")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
passed = sum(1 for _, result in results if result)
|
||||||
|
total = len(results)
|
||||||
|
|
||||||
|
for test_name, result in results:
|
||||||
|
status = "✅ PASS" if result else "❌ FAIL"
|
||||||
|
print(f"{status} - {test_name}")
|
||||||
|
|
||||||
|
print(f"\n{'=' * 60}")
|
||||||
|
print(f"Results: {passed}/{total} tests passed")
|
||||||
|
print(f"{'=' * 60}")
|
||||||
|
|
||||||
|
if passed == total:
|
||||||
|
print("\n🎉 All tests passed! /llm/job webhook feature is correctly implemented.")
|
||||||
|
print("\n📝 Summary of changes:")
|
||||||
|
print(" 1. LlmJobPayload model includes webhook_config field")
|
||||||
|
print(" 2. /llm/job endpoint extracts and passes webhook_config")
|
||||||
|
print(" 3. handle_llm_request accepts webhook_config parameter")
|
||||||
|
print(" 4. create_new_task stores webhook_config in Redis")
|
||||||
|
print(" 5. process_llm_extraction sends webhook notifications")
|
||||||
|
print(" 6. Follows the same pattern as /crawl/job")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print(f"\n⚠️ {total - passed} test(s) failed. Please review the output above.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
exit(main())
|
||||||
307
test_webhook_implementation.py
Normal file
307
test_webhook_implementation.py
Normal file
@@ -0,0 +1,307 @@
|
|||||||
|
"""
|
||||||
|
Simple test script to validate webhook implementation without running full server.
|
||||||
|
|
||||||
|
This script tests:
|
||||||
|
1. Webhook module imports and syntax
|
||||||
|
2. WebhookDeliveryService initialization
|
||||||
|
3. Payload construction logic
|
||||||
|
4. Configuration parsing
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
# Add deploy/docker to path to import modules
|
||||||
|
# sys.path.insert(0, '/home/user/crawl4ai/deploy/docker')
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'deploy', 'docker'))
|
||||||
|
|
||||||
|
def test_imports():
|
||||||
|
"""Test that all webhook-related modules can be imported"""
|
||||||
|
print("=" * 60)
|
||||||
|
print("TEST 1: Module Imports")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from webhook import WebhookDeliveryService
|
||||||
|
print("✅ webhook.WebhookDeliveryService imported successfully")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed to import webhook module: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
from schemas import WebhookConfig, WebhookPayload
|
||||||
|
print("✅ schemas.WebhookConfig imported successfully")
|
||||||
|
print("✅ schemas.WebhookPayload imported successfully")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed to import schemas: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def test_webhook_service_init():
|
||||||
|
"""Test WebhookDeliveryService initialization"""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST 2: WebhookDeliveryService Initialization")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from webhook import WebhookDeliveryService
|
||||||
|
|
||||||
|
# Test with default config
|
||||||
|
config = {
|
||||||
|
"webhooks": {
|
||||||
|
"enabled": True,
|
||||||
|
"default_url": None,
|
||||||
|
"data_in_payload": False,
|
||||||
|
"retry": {
|
||||||
|
"max_attempts": 5,
|
||||||
|
"initial_delay_ms": 1000,
|
||||||
|
"max_delay_ms": 32000,
|
||||||
|
"timeout_ms": 30000
|
||||||
|
},
|
||||||
|
"headers": {
|
||||||
|
"User-Agent": "Crawl4AI-Webhook/1.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
service = WebhookDeliveryService(config)
|
||||||
|
|
||||||
|
print(f"✅ Service initialized successfully")
|
||||||
|
print(f" - Max attempts: {service.max_attempts}")
|
||||||
|
print(f" - Initial delay: {service.initial_delay}s")
|
||||||
|
print(f" - Max delay: {service.max_delay}s")
|
||||||
|
print(f" - Timeout: {service.timeout}s")
|
||||||
|
|
||||||
|
# Verify calculations
|
||||||
|
assert service.max_attempts == 5, "Max attempts should be 5"
|
||||||
|
assert service.initial_delay == 1.0, "Initial delay should be 1.0s"
|
||||||
|
assert service.max_delay == 32.0, "Max delay should be 32.0s"
|
||||||
|
assert service.timeout == 30.0, "Timeout should be 30.0s"
|
||||||
|
|
||||||
|
print("✅ All configuration values correct")
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Service initialization failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_webhook_config_model():
|
||||||
|
"""Test WebhookConfig Pydantic model"""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST 3: WebhookConfig Model Validation")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from schemas import WebhookConfig
|
||||||
|
from pydantic import ValidationError
|
||||||
|
|
||||||
|
# Test valid config
|
||||||
|
valid_config = {
|
||||||
|
"webhook_url": "https://example.com/webhook",
|
||||||
|
"webhook_data_in_payload": True,
|
||||||
|
"webhook_headers": {"X-Secret": "token123"}
|
||||||
|
}
|
||||||
|
|
||||||
|
config = WebhookConfig(**valid_config)
|
||||||
|
print(f"✅ Valid config accepted:")
|
||||||
|
print(f" - URL: {config.webhook_url}")
|
||||||
|
print(f" - Data in payload: {config.webhook_data_in_payload}")
|
||||||
|
print(f" - Headers: {config.webhook_headers}")
|
||||||
|
|
||||||
|
# Test minimal config
|
||||||
|
minimal_config = {
|
||||||
|
"webhook_url": "https://example.com/webhook"
|
||||||
|
}
|
||||||
|
|
||||||
|
config2 = WebhookConfig(**minimal_config)
|
||||||
|
print(f"✅ Minimal config accepted (defaults applied):")
|
||||||
|
print(f" - URL: {config2.webhook_url}")
|
||||||
|
print(f" - Data in payload: {config2.webhook_data_in_payload}")
|
||||||
|
print(f" - Headers: {config2.webhook_headers}")
|
||||||
|
|
||||||
|
# Test invalid URL
|
||||||
|
try:
|
||||||
|
invalid_config = {
|
||||||
|
"webhook_url": "not-a-url"
|
||||||
|
}
|
||||||
|
config3 = WebhookConfig(**invalid_config)
|
||||||
|
print(f"❌ Invalid URL should have been rejected")
|
||||||
|
return False
|
||||||
|
except ValidationError as e:
|
||||||
|
print(f"✅ Invalid URL correctly rejected")
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Model validation test failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_payload_construction():
|
||||||
|
"""Test webhook payload construction logic"""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST 4: Payload Construction")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Simulate payload construction from notify_job_completion
|
||||||
|
task_id = "crawl_abc123"
|
||||||
|
task_type = "crawl"
|
||||||
|
status = "completed"
|
||||||
|
urls = ["https://example.com"]
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"task_id": task_id,
|
||||||
|
"task_type": task_type,
|
||||||
|
"status": status,
|
||||||
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"urls": urls
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"✅ Basic payload constructed:")
|
||||||
|
print(json.dumps(payload, indent=2))
|
||||||
|
|
||||||
|
# Test with error
|
||||||
|
error_payload = {
|
||||||
|
"task_id": "crawl_xyz789",
|
||||||
|
"task_type": "crawl",
|
||||||
|
"status": "failed",
|
||||||
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"error": "Connection timeout"
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"\n✅ Error payload constructed:")
|
||||||
|
print(json.dumps(error_payload, indent=2))
|
||||||
|
|
||||||
|
# Test with data
|
||||||
|
data_payload = {
|
||||||
|
"task_id": "crawl_def456",
|
||||||
|
"task_type": "crawl",
|
||||||
|
"status": "completed",
|
||||||
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"data": {
|
||||||
|
"results": [
|
||||||
|
{"url": "https://example.com", "markdown": "# Example"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"\n✅ Data payload constructed:")
|
||||||
|
print(json.dumps(data_payload, indent=2))
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Payload construction failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_exponential_backoff():
|
||||||
|
"""Test exponential backoff calculation"""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST 5: Exponential Backoff Calculation")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
initial_delay = 1.0 # 1 second
|
||||||
|
max_delay = 32.0 # 32 seconds
|
||||||
|
|
||||||
|
print("Backoff delays for 5 attempts:")
|
||||||
|
for attempt in range(5):
|
||||||
|
delay = min(initial_delay * (2 ** attempt), max_delay)
|
||||||
|
print(f" Attempt {attempt + 1}: {delay}s")
|
||||||
|
|
||||||
|
# Verify the sequence: 1s, 2s, 4s, 8s, 16s
|
||||||
|
expected = [1.0, 2.0, 4.0, 8.0, 16.0]
|
||||||
|
actual = [min(initial_delay * (2 ** i), max_delay) for i in range(5)]
|
||||||
|
|
||||||
|
assert actual == expected, f"Expected {expected}, got {actual}"
|
||||||
|
print("✅ Exponential backoff sequence correct")
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Backoff calculation failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_api_integration():
|
||||||
|
"""Test that api.py imports webhook module correctly"""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST 6: API Integration")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Check if api.py can import webhook module
|
||||||
|
api_path = os.path.join(os.path.dirname(__file__), 'deploy', 'docker', 'api.py')
|
||||||
|
with open(api_path, 'r') as f:
|
||||||
|
api_content = f.read()
|
||||||
|
|
||||||
|
if 'from webhook import WebhookDeliveryService' in api_content:
|
||||||
|
print("✅ api.py imports WebhookDeliveryService")
|
||||||
|
else:
|
||||||
|
print("❌ api.py missing webhook import")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if 'WebhookDeliveryService(config)' in api_content:
|
||||||
|
print("✅ api.py initializes WebhookDeliveryService")
|
||||||
|
else:
|
||||||
|
print("❌ api.py doesn't initialize WebhookDeliveryService")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if 'notify_job_completion' in api_content:
|
||||||
|
print("✅ api.py calls notify_job_completion")
|
||||||
|
else:
|
||||||
|
print("❌ api.py doesn't call notify_job_completion")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ API integration check failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run all tests"""
|
||||||
|
print("\n🧪 Webhook Implementation Validation Tests")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
# Run tests
|
||||||
|
results.append(("Module Imports", test_imports()))
|
||||||
|
results.append(("Service Initialization", test_webhook_service_init()))
|
||||||
|
results.append(("Config Model", test_webhook_config_model()))
|
||||||
|
results.append(("Payload Construction", test_payload_construction()))
|
||||||
|
results.append(("Exponential Backoff", test_exponential_backoff()))
|
||||||
|
results.append(("API Integration", test_api_integration()))
|
||||||
|
|
||||||
|
# Print summary
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST SUMMARY")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
passed = sum(1 for _, result in results if result)
|
||||||
|
total = len(results)
|
||||||
|
|
||||||
|
for test_name, result in results:
|
||||||
|
status = "✅ PASS" if result else "❌ FAIL"
|
||||||
|
print(f"{status} - {test_name}")
|
||||||
|
|
||||||
|
print(f"\n{'=' * 60}")
|
||||||
|
print(f"Results: {passed}/{total} tests passed")
|
||||||
|
print(f"{'=' * 60}")
|
||||||
|
|
||||||
|
if passed == total:
|
||||||
|
print("\n🎉 All tests passed! Webhook implementation is valid.")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print(f"\n⚠️ {total - passed} test(s) failed. Please review the output above.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
exit(main())
|
||||||
251
tests/WEBHOOK_TEST_README.md
Normal file
251
tests/WEBHOOK_TEST_README.md
Normal file
@@ -0,0 +1,251 @@
|
|||||||
|
# Webhook Feature Test Script
|
||||||
|
|
||||||
|
This directory contains a comprehensive test script for the webhook feature implementation.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The `test_webhook_feature.sh` script automates the entire process of testing the webhook feature:
|
||||||
|
|
||||||
|
1. ✅ Fetches and switches to the webhook feature branch
|
||||||
|
2. ✅ Activates the virtual environment
|
||||||
|
3. ✅ Installs all required dependencies
|
||||||
|
4. ✅ Starts Redis server in background
|
||||||
|
5. ✅ Starts Crawl4AI server in background
|
||||||
|
6. ✅ Runs webhook integration test
|
||||||
|
7. ✅ Verifies job completion via webhook
|
||||||
|
8. ✅ Cleans up and returns to original branch
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- Python 3.10+
|
||||||
|
- Virtual environment already created (`venv/` in project root)
|
||||||
|
- Git repository with the webhook feature branch
|
||||||
|
- `redis-server` (script will attempt to install if missing)
|
||||||
|
- `curl` and `lsof` commands available
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Quick Start
|
||||||
|
|
||||||
|
From the project root:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./tests/test_webhook_feature.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Or from the tests directory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd tests
|
||||||
|
./test_webhook_feature.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### What the Script Does
|
||||||
|
|
||||||
|
#### Step 1: Branch Management
|
||||||
|
- Saves your current branch
|
||||||
|
- Fetches the webhook feature branch from remote
|
||||||
|
- Switches to the webhook feature branch
|
||||||
|
|
||||||
|
#### Step 2: Environment Setup
|
||||||
|
- Activates your existing virtual environment
|
||||||
|
- Installs dependencies from `deploy/docker/requirements.txt`
|
||||||
|
- Installs Flask for the webhook receiver
|
||||||
|
|
||||||
|
#### Step 3: Service Startup
|
||||||
|
- Starts Redis server on port 6379
|
||||||
|
- Starts Crawl4AI server on port 11235
|
||||||
|
- Waits for server health check to pass
|
||||||
|
|
||||||
|
#### Step 4: Webhook Test
|
||||||
|
- Creates a webhook receiver on port 8080
|
||||||
|
- Submits a crawl job for `https://example.com` with webhook config
|
||||||
|
- Waits for webhook notification (60s timeout)
|
||||||
|
- Verifies webhook payload contains expected data
|
||||||
|
|
||||||
|
#### Step 5: Cleanup
|
||||||
|
- Stops webhook receiver
|
||||||
|
- Stops Crawl4AI server
|
||||||
|
- Stops Redis server
|
||||||
|
- Returns to your original branch
|
||||||
|
|
||||||
|
## Expected Output
|
||||||
|
|
||||||
|
```
|
||||||
|
[INFO] Starting webhook feature test script
|
||||||
|
[INFO] Project root: /path/to/crawl4ai
|
||||||
|
[INFO] Step 1: Fetching PR branch...
|
||||||
|
[INFO] Current branch: develop
|
||||||
|
[SUCCESS] Branch fetched
|
||||||
|
[INFO] Step 2: Switching to branch: claude/implement-webhook-crawl-feature-011CULZY1Jy8N5MUkZqXkRVp
|
||||||
|
[SUCCESS] Switched to webhook feature branch
|
||||||
|
[INFO] Step 3: Activating virtual environment...
|
||||||
|
[SUCCESS] Virtual environment activated
|
||||||
|
[INFO] Step 4: Installing server dependencies...
|
||||||
|
[SUCCESS] Dependencies installed
|
||||||
|
[INFO] Step 5a: Starting Redis...
|
||||||
|
[SUCCESS] Redis started (PID: 12345)
|
||||||
|
[INFO] Step 5b: Starting server on port 11235...
|
||||||
|
[INFO] Server started (PID: 12346)
|
||||||
|
[INFO] Waiting for server to be ready...
|
||||||
|
[SUCCESS] Server is ready!
|
||||||
|
[INFO] Step 6: Creating webhook test script...
|
||||||
|
[INFO] Running webhook test...
|
||||||
|
|
||||||
|
🚀 Submitting crawl job with webhook...
|
||||||
|
✅ Job submitted successfully, task_id: crawl_abc123
|
||||||
|
⏳ Waiting for webhook notification...
|
||||||
|
|
||||||
|
✅ Webhook received: {
|
||||||
|
"task_id": "crawl_abc123",
|
||||||
|
"task_type": "crawl",
|
||||||
|
"status": "completed",
|
||||||
|
"timestamp": "2025-10-22T00:00:00.000000+00:00",
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"data": { ... }
|
||||||
|
}
|
||||||
|
|
||||||
|
✅ Webhook received!
|
||||||
|
Task ID: crawl_abc123
|
||||||
|
Status: completed
|
||||||
|
URLs: ['https://example.com']
|
||||||
|
✅ Data included in webhook payload
|
||||||
|
📄 Crawled 1 URL(s)
|
||||||
|
- https://example.com: 1234 chars
|
||||||
|
|
||||||
|
🎉 Webhook test PASSED!
|
||||||
|
|
||||||
|
[INFO] Step 7: Verifying test results...
|
||||||
|
[SUCCESS] ✅ Webhook test PASSED!
|
||||||
|
[SUCCESS] All tests completed successfully! 🎉
|
||||||
|
[INFO] Cleanup will happen automatically...
|
||||||
|
[INFO] Starting cleanup...
|
||||||
|
[INFO] Stopping webhook receiver...
|
||||||
|
[INFO] Stopping server...
|
||||||
|
[INFO] Stopping Redis...
|
||||||
|
[INFO] Switching back to branch: develop
|
||||||
|
[SUCCESS] Cleanup complete
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Server Failed to Start
|
||||||
|
|
||||||
|
If the server fails to start, check the logs:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
tail -100 /tmp/crawl4ai_server.log
|
||||||
|
```
|
||||||
|
|
||||||
|
Common issues:
|
||||||
|
- Port 11235 already in use: `lsof -ti:11235 | xargs kill -9`
|
||||||
|
- Missing dependencies: Check that all packages are installed
|
||||||
|
|
||||||
|
### Redis Connection Failed
|
||||||
|
|
||||||
|
Check if Redis is running:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
redis-cli ping
|
||||||
|
# Should return: PONG
|
||||||
|
```
|
||||||
|
|
||||||
|
If not running:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
redis-server --port 6379 --daemonize yes
|
||||||
|
```
|
||||||
|
|
||||||
|
### Webhook Not Received
|
||||||
|
|
||||||
|
The script has a 60-second timeout for webhook delivery. If the webhook isn't received:
|
||||||
|
|
||||||
|
1. Check server logs: `/tmp/crawl4ai_server.log`
|
||||||
|
2. Verify webhook receiver is running on port 8080
|
||||||
|
3. Check network connectivity between components
|
||||||
|
|
||||||
|
### Script Interruption
|
||||||
|
|
||||||
|
If the script is interrupted (Ctrl+C), cleanup happens automatically via trap. The script will:
|
||||||
|
- Kill all background processes
|
||||||
|
- Stop Redis
|
||||||
|
- Return to your original branch
|
||||||
|
|
||||||
|
To manually cleanup if needed:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Kill processes by port
|
||||||
|
lsof -ti:11235 | xargs kill -9 # Server
|
||||||
|
lsof -ti:8080 | xargs kill -9 # Webhook receiver
|
||||||
|
lsof -ti:6379 | xargs kill -9 # Redis
|
||||||
|
|
||||||
|
# Return to your branch
|
||||||
|
git checkout develop # or your branch name
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing Different URLs
|
||||||
|
|
||||||
|
To test with a different URL, modify the script or create a custom test:
|
||||||
|
|
||||||
|
```python
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://your-url-here.com"],
|
||||||
|
"browser_config": {"headless": True},
|
||||||
|
"crawler_config": {"cache_mode": "bypass"},
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": "http://localhost:8080/webhook",
|
||||||
|
"webhook_data_in_payload": True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Files Generated
|
||||||
|
|
||||||
|
The script creates temporary files:
|
||||||
|
|
||||||
|
- `/tmp/crawl4ai_server.log` - Server output logs
|
||||||
|
- `/tmp/test_webhook.py` - Webhook test Python script
|
||||||
|
|
||||||
|
These are not cleaned up automatically so you can review them after the test.
|
||||||
|
|
||||||
|
## Exit Codes
|
||||||
|
|
||||||
|
- `0` - All tests passed successfully
|
||||||
|
- `1` - Test failed (check output for details)
|
||||||
|
|
||||||
|
## Safety Features
|
||||||
|
|
||||||
|
- ✅ Automatic cleanup on exit, interrupt, or error
|
||||||
|
- ✅ Returns to original branch on completion
|
||||||
|
- ✅ Kills all background processes
|
||||||
|
- ✅ Comprehensive error handling
|
||||||
|
- ✅ Colored output for easy reading
|
||||||
|
- ✅ Detailed logging at each step
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- The script uses `set -e` to exit on any command failure
|
||||||
|
- All background processes are tracked and cleaned up
|
||||||
|
- The virtual environment must exist before running
|
||||||
|
- Redis must be available (installed or installable via apt-get/brew)
|
||||||
|
|
||||||
|
## Integration with CI/CD
|
||||||
|
|
||||||
|
This script can be integrated into CI/CD pipelines:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Example GitHub Actions
|
||||||
|
- name: Test Webhook Feature
|
||||||
|
run: |
|
||||||
|
chmod +x tests/test_webhook_feature.sh
|
||||||
|
./tests/test_webhook_feature.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
If you encounter issues:
|
||||||
|
|
||||||
|
1. Check the troubleshooting section above
|
||||||
|
2. Review server logs at `/tmp/crawl4ai_server.log`
|
||||||
|
3. Ensure all prerequisites are met
|
||||||
|
4. Open an issue with the full output of the script
|
||||||
372
tests/docker/test_hooks_client.py
Normal file
372
tests/docker/test_hooks_client.py
Normal file
@@ -0,0 +1,372 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test client for demonstrating user-provided hooks in Crawl4AI Docker API
|
||||||
|
"""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
|
||||||
|
API_BASE_URL = "http://localhost:11234" # Adjust if needed
|
||||||
|
|
||||||
|
|
||||||
|
def test_hooks_info():
|
||||||
|
"""Get information about available hooks"""
|
||||||
|
print("=" * 70)
|
||||||
|
print("Testing: GET /hooks/info")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
response = requests.get(f"{API_BASE_URL}/hooks/info")
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
print("Available Hook Points:")
|
||||||
|
for hook, info in data['available_hooks'].items():
|
||||||
|
print(f"\n{hook}:")
|
||||||
|
print(f" Parameters: {', '.join(info['parameters'])}")
|
||||||
|
print(f" Description: {info['description']}")
|
||||||
|
else:
|
||||||
|
print(f"Error: {response.status_code}")
|
||||||
|
print(response.text)
|
||||||
|
|
||||||
|
|
||||||
|
def test_basic_crawl_with_hooks():
|
||||||
|
"""Test basic crawling with user-provided hooks"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Testing: POST /crawl with hooks")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# Define hooks as Python code strings
|
||||||
|
hooks_code = {
|
||||||
|
"on_page_context_created": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print("Hook: Setting up page context")
|
||||||
|
# Block images to speed up crawling
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
|
||||||
|
print("Hook: Images blocked")
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_retrieve_html": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print("Hook: Before retrieving HTML")
|
||||||
|
# Scroll to bottom to load lazy content
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
print("Hook: Scrolled to bottom")
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_goto": """
|
||||||
|
async def hook(page, context, url, **kwargs):
|
||||||
|
print(f"Hook: About to navigate to {url}")
|
||||||
|
# Add custom headers
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'X-Test-Header': 'crawl4ai-hooks-test'
|
||||||
|
})
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create request payload
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/html"],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_code,
|
||||||
|
"timeout": 30
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print("Sending request with hooks...")
|
||||||
|
response = requests.post(f"{API_BASE_URL}/crawl", json=payload)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
print("\n✅ Crawl successful!")
|
||||||
|
|
||||||
|
# Check hooks status
|
||||||
|
if 'hooks' in data:
|
||||||
|
hooks_info = data['hooks']
|
||||||
|
print("\nHooks Execution Summary:")
|
||||||
|
print(f" Status: {hooks_info['status']['status']}")
|
||||||
|
print(f" Attached hooks: {', '.join(hooks_info['status']['attached_hooks'])}")
|
||||||
|
|
||||||
|
if hooks_info['status']['validation_errors']:
|
||||||
|
print("\n⚠️ Validation Errors:")
|
||||||
|
for error in hooks_info['status']['validation_errors']:
|
||||||
|
print(f" - {error['hook_point']}: {error['error']}")
|
||||||
|
|
||||||
|
if 'summary' in hooks_info:
|
||||||
|
summary = hooks_info['summary']
|
||||||
|
print(f"\nExecution Statistics:")
|
||||||
|
print(f" Total executions: {summary['total_executions']}")
|
||||||
|
print(f" Successful: {summary['successful']}")
|
||||||
|
print(f" Failed: {summary['failed']}")
|
||||||
|
print(f" Timed out: {summary['timed_out']}")
|
||||||
|
print(f" Success rate: {summary['success_rate']:.1f}%")
|
||||||
|
|
||||||
|
if hooks_info['execution_log']:
|
||||||
|
print("\nExecution Log:")
|
||||||
|
for log_entry in hooks_info['execution_log']:
|
||||||
|
status_icon = "✅" if log_entry['status'] == 'success' else "❌"
|
||||||
|
print(f" {status_icon} {log_entry['hook_point']}: {log_entry['status']} ({log_entry.get('execution_time', 0):.2f}s)")
|
||||||
|
|
||||||
|
if hooks_info['errors']:
|
||||||
|
print("\n❌ Hook Errors:")
|
||||||
|
for error in hooks_info['errors']:
|
||||||
|
print(f" - {error['hook_point']}: {error['error']}")
|
||||||
|
|
||||||
|
# Show crawl results
|
||||||
|
if 'results' in data:
|
||||||
|
print(f"\nCrawled {len(data['results'])} URL(s)")
|
||||||
|
for result in data['results']:
|
||||||
|
print(f" - {result['url']}: {'✅' if result['success'] else '❌'}")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(f"❌ Error: {response.status_code}")
|
||||||
|
print(response.text)
|
||||||
|
|
||||||
|
|
||||||
|
def test_invalid_hook():
|
||||||
|
"""Test with an invalid hook to see error handling"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Testing: Invalid hook handling")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# Intentionally broken hook
|
||||||
|
hooks_code = {
|
||||||
|
"on_page_context_created": """
|
||||||
|
def hook(page, context): # Missing async!
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_retrieve_html": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
# This will cause an error
|
||||||
|
await page.non_existent_method()
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/html"],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_code,
|
||||||
|
"timeout": 5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print("Sending request with invalid hooks...")
|
||||||
|
response = requests.post(f"{API_BASE_URL}/crawl", json=payload)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
if 'hooks' in data:
|
||||||
|
hooks_info = data['hooks']
|
||||||
|
print(f"\nHooks Status: {hooks_info['status']['status']}")
|
||||||
|
|
||||||
|
if hooks_info['status']['validation_errors']:
|
||||||
|
print("\n✅ Validation caught errors (as expected):")
|
||||||
|
for error in hooks_info['status']['validation_errors']:
|
||||||
|
print(f" - {error['hook_point']}: {error['error']}")
|
||||||
|
|
||||||
|
if hooks_info['errors']:
|
||||||
|
print("\n✅ Runtime errors handled gracefully:")
|
||||||
|
for error in hooks_info['errors']:
|
||||||
|
print(f" - {error['hook_point']}: {error['error']}")
|
||||||
|
|
||||||
|
# The crawl should still succeed despite hook errors
|
||||||
|
if data.get('success'):
|
||||||
|
print("\n✅ Crawl succeeded despite hook errors (error isolation working!)")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(f"Error: {response.status_code}")
|
||||||
|
print(response.text)
|
||||||
|
|
||||||
|
|
||||||
|
def test_authentication_hook():
|
||||||
|
"""Test authentication using hooks"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Testing: Authentication with hooks")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
hooks_code = {
|
||||||
|
"before_goto": """
|
||||||
|
async def hook(page, context, url, **kwargs):
|
||||||
|
# For httpbin.org basic auth test, set Authorization header
|
||||||
|
import base64
|
||||||
|
|
||||||
|
# httpbin.org/basic-auth/user/passwd expects username="user" and password="passwd"
|
||||||
|
credentials = base64.b64encode(b"user:passwd").decode('ascii')
|
||||||
|
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'Authorization': f'Basic {credentials}'
|
||||||
|
})
|
||||||
|
|
||||||
|
print(f"Hook: Set Authorization header for {url}")
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
"on_page_context_created": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
# Example: Add cookies for session tracking
|
||||||
|
await context.add_cookies([
|
||||||
|
{
|
||||||
|
'name': 'session_id',
|
||||||
|
'value': 'test_session_123',
|
||||||
|
'domain': '.httpbin.org',
|
||||||
|
'path': '/',
|
||||||
|
'httpOnly': True,
|
||||||
|
'secure': True
|
||||||
|
}
|
||||||
|
])
|
||||||
|
|
||||||
|
print("Hook: Added session cookie")
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/basic-auth/user/passwd"],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_code,
|
||||||
|
"timeout": 30
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print("Sending request with authentication hook...")
|
||||||
|
response = requests.post(f"{API_BASE_URL}/crawl", json=payload)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
if data.get('success'):
|
||||||
|
print("✅ Crawl with authentication hook successful")
|
||||||
|
|
||||||
|
# Check if hooks executed
|
||||||
|
if 'hooks' in data:
|
||||||
|
hooks_info = data['hooks']
|
||||||
|
if hooks_info.get('summary', {}).get('successful', 0) > 0:
|
||||||
|
print(f"✅ Authentication hooks executed: {hooks_info['summary']['successful']} successful")
|
||||||
|
|
||||||
|
# Check for any hook errors
|
||||||
|
if hooks_info.get('errors'):
|
||||||
|
print("⚠️ Hook errors:")
|
||||||
|
for error in hooks_info['errors']:
|
||||||
|
print(f" - {error}")
|
||||||
|
|
||||||
|
# Check if authentication worked by looking at the result
|
||||||
|
if 'results' in data and len(data['results']) > 0:
|
||||||
|
result = data['results'][0]
|
||||||
|
if result.get('success'):
|
||||||
|
print("✅ Page crawled successfully (authentication worked!)")
|
||||||
|
# httpbin.org/basic-auth returns JSON with authenticated=true when successful
|
||||||
|
if 'authenticated' in str(result.get('html', '')):
|
||||||
|
print("✅ Authentication confirmed in response content")
|
||||||
|
else:
|
||||||
|
print(f"❌ Crawl failed: {result.get('error_message', 'Unknown error')}")
|
||||||
|
else:
|
||||||
|
print("❌ Request failed")
|
||||||
|
print(f"Response: {json.dumps(data, indent=2)}")
|
||||||
|
else:
|
||||||
|
print(f"❌ Error: {response.status_code}")
|
||||||
|
try:
|
||||||
|
error_data = response.json()
|
||||||
|
print(f"Error details: {json.dumps(error_data, indent=2)}")
|
||||||
|
except:
|
||||||
|
print(f"Error text: {response.text[:500]}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_streaming_with_hooks():
|
||||||
|
"""Test streaming endpoint with hooks"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Testing: POST /crawl/stream with hooks")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
hooks_code = {
|
||||||
|
"before_retrieve_html": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
await page.evaluate("document.querySelectorAll('img').forEach(img => img.remove())")
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/html", "https://httpbin.org/json"],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_code,
|
||||||
|
"timeout": 10
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print("Sending streaming request with hooks...")
|
||||||
|
|
||||||
|
with requests.post(f"{API_BASE_URL}/crawl/stream", json=payload, stream=True) as response:
|
||||||
|
if response.status_code == 200:
|
||||||
|
# Check headers for hooks status
|
||||||
|
hooks_status = response.headers.get('X-Hooks-Status')
|
||||||
|
if hooks_status:
|
||||||
|
print(f"Hooks Status (from header): {hooks_status}")
|
||||||
|
|
||||||
|
print("\nStreaming results:")
|
||||||
|
for line in response.iter_lines():
|
||||||
|
if line:
|
||||||
|
try:
|
||||||
|
result = json.loads(line)
|
||||||
|
if 'url' in result:
|
||||||
|
print(f" Received: {result['url']}")
|
||||||
|
elif 'status' in result:
|
||||||
|
print(f" Stream status: {result['status']}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
print(f" Raw: {line.decode()}")
|
||||||
|
else:
|
||||||
|
print(f"Error: {response.status_code}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_basic_without_hooks():
|
||||||
|
"""Test basic crawl without hooks"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Testing: POST /crawl with no hooks")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/html", "https://httpbin.org/json"]
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(f"{API_BASE_URL}/crawl", json=payload)
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
print(f"Response: {json.dumps(data, indent=2)}")
|
||||||
|
else:
|
||||||
|
print(f"Error: {response.status_code}")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run all tests"""
|
||||||
|
print("🔧 Crawl4AI Docker API - Hooks Testing")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# Test 1: Get hooks information
|
||||||
|
# test_hooks_info()
|
||||||
|
|
||||||
|
# Test 2: Basic crawl with hooks
|
||||||
|
# test_basic_crawl_with_hooks()
|
||||||
|
|
||||||
|
# Test 3: Invalid hooks (error handling)
|
||||||
|
test_invalid_hook()
|
||||||
|
|
||||||
|
# # Test 4: Authentication hook
|
||||||
|
# test_authentication_hook()
|
||||||
|
|
||||||
|
# # Test 5: Streaming with hooks
|
||||||
|
# test_streaming_with_hooks()
|
||||||
|
|
||||||
|
# # Test 6: Basic crawl without hooks
|
||||||
|
# test_basic_without_hooks()
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("✅ All tests completed!")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
512
tests/docker/test_hooks_comprehensive.py
Normal file
512
tests/docker/test_hooks_comprehensive.py
Normal file
@@ -0,0 +1,512 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Comprehensive test demonstrating all hook types from hooks_example.py
|
||||||
|
adapted for the Docker API with real URLs
|
||||||
|
"""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
API_BASE_URL = "http://localhost:11234"
|
||||||
|
|
||||||
|
|
||||||
|
def test_all_hooks_demo():
|
||||||
|
"""Demonstrate all 8 hook types with practical examples"""
|
||||||
|
print("=" * 70)
|
||||||
|
print("Testing: All Hooks Comprehensive Demo")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
hooks_code = {
|
||||||
|
"on_browser_created": """
|
||||||
|
async def hook(browser, **kwargs):
|
||||||
|
# Hook called after browser is created
|
||||||
|
print("[HOOK] on_browser_created - Browser is ready!")
|
||||||
|
# Browser-level configurations would go here
|
||||||
|
return browser
|
||||||
|
""",
|
||||||
|
|
||||||
|
"on_page_context_created": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
# Hook called after a new page and context are created
|
||||||
|
print("[HOOK] on_page_context_created - New page created!")
|
||||||
|
|
||||||
|
# Set viewport size for consistent rendering
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
|
||||||
|
# Add cookies for the session (using httpbin.org domain)
|
||||||
|
await context.add_cookies([
|
||||||
|
{
|
||||||
|
"name": "test_session",
|
||||||
|
"value": "abc123xyz",
|
||||||
|
"domain": ".httpbin.org",
|
||||||
|
"path": "/",
|
||||||
|
"httpOnly": True,
|
||||||
|
"secure": True
|
||||||
|
}
|
||||||
|
])
|
||||||
|
|
||||||
|
# Block ads and tracking scripts to speed up crawling
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp,svg}", lambda route: route.abort())
|
||||||
|
await context.route("**/analytics/*", lambda route: route.abort())
|
||||||
|
await context.route("**/ads/*", lambda route: route.abort())
|
||||||
|
|
||||||
|
print("[HOOK] Viewport set, cookies added, and ads blocked")
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"on_user_agent_updated": """
|
||||||
|
async def hook(page, context, user_agent, **kwargs):
|
||||||
|
# Hook called when user agent is updated
|
||||||
|
print(f"[HOOK] on_user_agent_updated - User agent: {user_agent[:50]}...")
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_goto": """
|
||||||
|
async def hook(page, context, url, **kwargs):
|
||||||
|
# Hook called before navigating to each URL
|
||||||
|
print(f"[HOOK] before_goto - About to visit: {url}")
|
||||||
|
|
||||||
|
# Add custom headers for the request
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
"X-Custom-Header": "crawl4ai-test",
|
||||||
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
"DNT": "1"
|
||||||
|
})
|
||||||
|
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"after_goto": """
|
||||||
|
async def hook(page, context, url, response, **kwargs):
|
||||||
|
# Hook called after navigating to each URL
|
||||||
|
print(f"[HOOK] after_goto - Successfully loaded: {url}")
|
||||||
|
|
||||||
|
# Wait a moment for dynamic content to load
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
|
||||||
|
# Check if specific elements exist (with error handling)
|
||||||
|
try:
|
||||||
|
# For httpbin.org, wait for body element
|
||||||
|
await page.wait_for_selector("body", timeout=2000)
|
||||||
|
print("[HOOK] Body element found and loaded")
|
||||||
|
except:
|
||||||
|
print("[HOOK] Timeout waiting for body, continuing anyway")
|
||||||
|
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"on_execution_started": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
# Hook called after custom JavaScript execution
|
||||||
|
print("[HOOK] on_execution_started - Custom JS executed!")
|
||||||
|
|
||||||
|
# You could inject additional JavaScript here if needed
|
||||||
|
await page.evaluate("console.log('[INJECTED] Hook JS running');")
|
||||||
|
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_retrieve_html": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
# Hook called before retrieving the HTML content
|
||||||
|
print("[HOOK] before_retrieve_html - Preparing to get HTML")
|
||||||
|
|
||||||
|
# Scroll to bottom to trigger lazy loading
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
|
||||||
|
await page.wait_for_timeout(500)
|
||||||
|
|
||||||
|
# Scroll back to top
|
||||||
|
await page.evaluate("window.scrollTo(0, 0);")
|
||||||
|
await page.wait_for_timeout(500)
|
||||||
|
|
||||||
|
# One more scroll to middle for good measure
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight / 2);")
|
||||||
|
|
||||||
|
print("[HOOK] Scrolling completed for lazy-loaded content")
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_return_html": """
|
||||||
|
async def hook(page, context, html, **kwargs):
|
||||||
|
# Hook called before returning the HTML content
|
||||||
|
print(f"[HOOK] before_return_html - HTML length: {len(html)} characters")
|
||||||
|
|
||||||
|
# Log some page metrics
|
||||||
|
metrics = await page.evaluate('''() => {
|
||||||
|
return {
|
||||||
|
images: document.images.length,
|
||||||
|
links: document.links.length,
|
||||||
|
scripts: document.scripts.length
|
||||||
|
}
|
||||||
|
}''')
|
||||||
|
|
||||||
|
print(f"[HOOK] Page metrics - Images: {metrics['images']}, Links: {metrics['links']}, Scripts: {metrics['scripts']}")
|
||||||
|
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create request payload
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/html"],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_code,
|
||||||
|
"timeout": 30
|
||||||
|
},
|
||||||
|
"crawler_config": {
|
||||||
|
"js_code": "window.scrollTo(0, document.body.scrollHeight);",
|
||||||
|
"wait_for": "body",
|
||||||
|
"cache_mode": "bypass"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print("\nSending request with all 8 hooks...")
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
response = requests.post(f"{API_BASE_URL}/crawl", json=payload)
|
||||||
|
|
||||||
|
elapsed_time = time.time() - start_time
|
||||||
|
print(f"Request completed in {elapsed_time:.2f} seconds")
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
print("\n✅ Request successful!")
|
||||||
|
|
||||||
|
# Check hooks execution
|
||||||
|
if 'hooks' in data:
|
||||||
|
hooks_info = data['hooks']
|
||||||
|
print("\n📊 Hooks Execution Summary:")
|
||||||
|
print(f" Status: {hooks_info['status']['status']}")
|
||||||
|
print(f" Attached hooks: {len(hooks_info['status']['attached_hooks'])}")
|
||||||
|
|
||||||
|
for hook_name in hooks_info['status']['attached_hooks']:
|
||||||
|
print(f" ✓ {hook_name}")
|
||||||
|
|
||||||
|
if 'summary' in hooks_info:
|
||||||
|
summary = hooks_info['summary']
|
||||||
|
print(f"\n📈 Execution Statistics:")
|
||||||
|
print(f" Total executions: {summary['total_executions']}")
|
||||||
|
print(f" Successful: {summary['successful']}")
|
||||||
|
print(f" Failed: {summary['failed']}")
|
||||||
|
print(f" Timed out: {summary['timed_out']}")
|
||||||
|
print(f" Success rate: {summary['success_rate']:.1f}%")
|
||||||
|
|
||||||
|
if hooks_info.get('execution_log'):
|
||||||
|
print(f"\n📝 Execution Log:")
|
||||||
|
for log_entry in hooks_info['execution_log']:
|
||||||
|
status_icon = "✅" if log_entry['status'] == 'success' else "❌"
|
||||||
|
exec_time = log_entry.get('execution_time', 0)
|
||||||
|
print(f" {status_icon} {log_entry['hook_point']}: {exec_time:.3f}s")
|
||||||
|
|
||||||
|
# Check crawl results
|
||||||
|
if 'results' in data and len(data['results']) > 0:
|
||||||
|
print(f"\n📄 Crawl Results:")
|
||||||
|
for result in data['results']:
|
||||||
|
print(f" URL: {result['url']}")
|
||||||
|
print(f" Success: {result.get('success', False)}")
|
||||||
|
if result.get('html'):
|
||||||
|
print(f" HTML length: {len(result['html'])} characters")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(f"❌ Error: {response.status_code}")
|
||||||
|
try:
|
||||||
|
error_data = response.json()
|
||||||
|
print(f"Error details: {json.dumps(error_data, indent=2)}")
|
||||||
|
except:
|
||||||
|
print(f"Error text: {response.text[:500]}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_authentication_flow():
|
||||||
|
"""Test a complete authentication flow with multiple hooks"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Testing: Authentication Flow with Multiple Hooks")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
hooks_code = {
|
||||||
|
"on_page_context_created": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print("[HOOK] Setting up authentication context")
|
||||||
|
|
||||||
|
# Add authentication cookies
|
||||||
|
await context.add_cookies([
|
||||||
|
{
|
||||||
|
"name": "auth_token",
|
||||||
|
"value": "fake_jwt_token_here",
|
||||||
|
"domain": ".httpbin.org",
|
||||||
|
"path": "/",
|
||||||
|
"httpOnly": True,
|
||||||
|
"secure": True
|
||||||
|
}
|
||||||
|
])
|
||||||
|
|
||||||
|
# Set localStorage items (for SPA authentication)
|
||||||
|
await page.evaluate('''
|
||||||
|
localStorage.setItem('user_id', '12345');
|
||||||
|
localStorage.setItem('auth_time', new Date().toISOString());
|
||||||
|
''')
|
||||||
|
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_goto": """
|
||||||
|
async def hook(page, context, url, **kwargs):
|
||||||
|
print(f"[HOOK] Adding auth headers for {url}")
|
||||||
|
|
||||||
|
# Add Authorization header
|
||||||
|
import base64
|
||||||
|
credentials = base64.b64encode(b"user:passwd").decode('ascii')
|
||||||
|
|
||||||
|
await page.set_extra_http_headers({
|
||||||
|
'Authorization': f'Basic {credentials}',
|
||||||
|
'X-API-Key': 'test-api-key-123'
|
||||||
|
})
|
||||||
|
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"urls": [
|
||||||
|
"https://httpbin.org/basic-auth/user/passwd"
|
||||||
|
],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_code,
|
||||||
|
"timeout": 15
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print("\nTesting authentication with httpbin endpoints...")
|
||||||
|
response = requests.post(f"{API_BASE_URL}/crawl", json=payload)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
print("✅ Authentication test completed")
|
||||||
|
|
||||||
|
if 'results' in data:
|
||||||
|
for i, result in enumerate(data['results']):
|
||||||
|
print(f"\n URL {i+1}: {result['url']}")
|
||||||
|
if result.get('success'):
|
||||||
|
# Check for authentication success indicators
|
||||||
|
html_content = result.get('html', '')
|
||||||
|
if '"authenticated"' in html_content and 'true' in html_content:
|
||||||
|
print(" ✅ Authentication successful! Basic auth worked.")
|
||||||
|
else:
|
||||||
|
print(" ⚠️ Page loaded but auth status unclear")
|
||||||
|
else:
|
||||||
|
print(f" ❌ Failed: {result.get('error_message', 'Unknown error')}")
|
||||||
|
else:
|
||||||
|
print(f"❌ Error: {response.status_code}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_performance_optimization_hooks():
|
||||||
|
"""Test hooks for performance optimization"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Testing: Performance Optimization Hooks")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
hooks_code = {
|
||||||
|
"on_page_context_created": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print("[HOOK] Optimizing page for performance")
|
||||||
|
|
||||||
|
# Block resource-heavy content
|
||||||
|
await context.route("**/*.{png,jpg,jpeg,gif,webp,svg,ico}", lambda route: route.abort())
|
||||||
|
await context.route("**/*.{woff,woff2,ttf,otf}", lambda route: route.abort())
|
||||||
|
await context.route("**/*.{mp4,webm,ogg,mp3,wav}", lambda route: route.abort())
|
||||||
|
await context.route("**/googletagmanager.com/*", lambda route: route.abort())
|
||||||
|
await context.route("**/google-analytics.com/*", lambda route: route.abort())
|
||||||
|
await context.route("**/doubleclick.net/*", lambda route: route.abort())
|
||||||
|
await context.route("**/facebook.com/*", lambda route: route.abort())
|
||||||
|
|
||||||
|
# Disable animations and transitions
|
||||||
|
await page.add_style_tag(content='''
|
||||||
|
*, *::before, *::after {
|
||||||
|
animation-duration: 0s !important;
|
||||||
|
animation-delay: 0s !important;
|
||||||
|
transition-duration: 0s !important;
|
||||||
|
transition-delay: 0s !important;
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
|
||||||
|
print("[HOOK] Performance optimizations applied")
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_retrieve_html": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print("[HOOK] Removing unnecessary elements before extraction")
|
||||||
|
|
||||||
|
# Remove ads, popups, and other unnecessary elements
|
||||||
|
await page.evaluate('''() => {
|
||||||
|
// Remove common ad containers
|
||||||
|
const adSelectors = [
|
||||||
|
'.ad', '.ads', '.advertisement', '[id*="ad-"]', '[class*="ad-"]',
|
||||||
|
'.popup', '.modal', '.overlay', '.cookie-banner', '.newsletter-signup'
|
||||||
|
];
|
||||||
|
|
||||||
|
adSelectors.forEach(selector => {
|
||||||
|
document.querySelectorAll(selector).forEach(el => el.remove());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Remove script tags to clean up HTML
|
||||||
|
document.querySelectorAll('script').forEach(el => el.remove());
|
||||||
|
|
||||||
|
// Remove style tags we don't need
|
||||||
|
document.querySelectorAll('style').forEach(el => el.remove());
|
||||||
|
}''')
|
||||||
|
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/html"],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_code,
|
||||||
|
"timeout": 10
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print("\nTesting performance optimization hooks...")
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
response = requests.post(f"{API_BASE_URL}/crawl", json=payload)
|
||||||
|
|
||||||
|
elapsed_time = time.time() - start_time
|
||||||
|
print(f"Request completed in {elapsed_time:.2f} seconds")
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
print("✅ Performance optimization test completed")
|
||||||
|
|
||||||
|
if 'results' in data and len(data['results']) > 0:
|
||||||
|
result = data['results'][0]
|
||||||
|
if result.get('html'):
|
||||||
|
print(f" HTML size: {len(result['html'])} characters")
|
||||||
|
print(" Resources blocked, ads removed, animations disabled")
|
||||||
|
else:
|
||||||
|
print(f"❌ Error: {response.status_code}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_content_extraction_hooks():
|
||||||
|
"""Test hooks for intelligent content extraction"""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Testing: Content Extraction Hooks")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
hooks_code = {
|
||||||
|
"after_goto": """
|
||||||
|
async def hook(page, context, url, response, **kwargs):
|
||||||
|
print(f"[HOOK] Waiting for dynamic content on {url}")
|
||||||
|
|
||||||
|
# Wait for any lazy-loaded content
|
||||||
|
await page.wait_for_timeout(2000)
|
||||||
|
|
||||||
|
# Trigger any "Load More" buttons
|
||||||
|
try:
|
||||||
|
load_more = await page.query_selector('[class*="load-more"], [class*="show-more"], button:has-text("Load More")')
|
||||||
|
if load_more:
|
||||||
|
await load_more.click()
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
print("[HOOK] Clicked 'Load More' button")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return page
|
||||||
|
""",
|
||||||
|
|
||||||
|
"before_retrieve_html": """
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
print("[HOOK] Extracting structured data")
|
||||||
|
|
||||||
|
# Extract metadata
|
||||||
|
metadata = await page.evaluate('''() => {
|
||||||
|
const getMeta = (name) => {
|
||||||
|
const element = document.querySelector(`meta[name="${name}"], meta[property="${name}"]`);
|
||||||
|
return element ? element.getAttribute('content') : null;
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
title: document.title,
|
||||||
|
description: getMeta('description') || getMeta('og:description'),
|
||||||
|
author: getMeta('author'),
|
||||||
|
keywords: getMeta('keywords'),
|
||||||
|
ogTitle: getMeta('og:title'),
|
||||||
|
ogImage: getMeta('og:image'),
|
||||||
|
canonical: document.querySelector('link[rel="canonical"]')?.href,
|
||||||
|
jsonLd: Array.from(document.querySelectorAll('script[type="application/ld+json"]'))
|
||||||
|
.map(el => el.textContent).filter(Boolean)
|
||||||
|
};
|
||||||
|
}''')
|
||||||
|
|
||||||
|
print(f"[HOOK] Extracted metadata: {json.dumps(metadata, indent=2)}")
|
||||||
|
|
||||||
|
# Infinite scroll handling
|
||||||
|
for i in range(3):
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
print(f"[HOOK] Scroll iteration {i+1}/3")
|
||||||
|
|
||||||
|
return page
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://httpbin.org/html", "https://httpbin.org/json"],
|
||||||
|
"hooks": {
|
||||||
|
"code": hooks_code,
|
||||||
|
"timeout": 20
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print("\nTesting content extraction hooks...")
|
||||||
|
response = requests.post(f"{API_BASE_URL}/crawl", json=payload)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
print("✅ Content extraction test completed")
|
||||||
|
|
||||||
|
if 'hooks' in data and 'summary' in data['hooks']:
|
||||||
|
summary = data['hooks']['summary']
|
||||||
|
print(f" Hooks executed: {summary['successful']}/{summary['total_executions']}")
|
||||||
|
|
||||||
|
if 'results' in data:
|
||||||
|
for result in data['results']:
|
||||||
|
print(f"\n URL: {result['url']}")
|
||||||
|
print(f" Success: {result.get('success', False)}")
|
||||||
|
else:
|
||||||
|
print(f"❌ Error: {response.status_code}")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run comprehensive hook tests"""
|
||||||
|
print("🔧 Crawl4AI Docker API - Comprehensive Hooks Testing")
|
||||||
|
print("Based on docs/examples/hooks_example.py")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
tests = [
|
||||||
|
("All Hooks Demo", test_all_hooks_demo),
|
||||||
|
("Authentication Flow", test_authentication_flow),
|
||||||
|
("Performance Optimization", test_performance_optimization_hooks),
|
||||||
|
("Content Extraction", test_content_extraction_hooks),
|
||||||
|
]
|
||||||
|
|
||||||
|
for i, (name, test_func) in enumerate(tests, 1):
|
||||||
|
print(f"\n📌 Test {i}/{len(tests)}: {name}")
|
||||||
|
try:
|
||||||
|
test_func()
|
||||||
|
print(f"✅ {name} completed")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ {name} failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("🎉 All comprehensive hook tests completed!")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
193
tests/docker/test_hooks_utility.py
Normal file
193
tests/docker/test_hooks_utility.py
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
"""
|
||||||
|
Test script demonstrating the hooks_to_string utility and Docker client integration.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
from crawl4ai import Crawl4aiDockerClient, hooks_to_string
|
||||||
|
|
||||||
|
|
||||||
|
# Define hook functions as regular Python functions
|
||||||
|
async def auth_hook(page, context, **kwargs):
|
||||||
|
"""Add authentication cookies."""
|
||||||
|
await context.add_cookies([{
|
||||||
|
'name': 'test_cookie',
|
||||||
|
'value': 'test_value',
|
||||||
|
'domain': '.httpbin.org',
|
||||||
|
'path': '/'
|
||||||
|
}])
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def scroll_hook(page, context, **kwargs):
|
||||||
|
"""Scroll to load lazy content."""
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def viewport_hook(page, context, **kwargs):
|
||||||
|
"""Set custom viewport."""
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
async def test_hooks_utility():
|
||||||
|
"""Test the hooks_to_string utility function."""
|
||||||
|
print("=" * 60)
|
||||||
|
print("Testing hooks_to_string utility")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
# Create hooks dictionary with function objects
|
||||||
|
hooks_dict = {
|
||||||
|
"on_page_context_created": auth_hook,
|
||||||
|
"before_retrieve_html": scroll_hook
|
||||||
|
}
|
||||||
|
|
||||||
|
# Convert to string format
|
||||||
|
hooks_string = hooks_to_string(hooks_dict)
|
||||||
|
|
||||||
|
print("\n✓ Successfully converted function objects to strings")
|
||||||
|
print(f"\n✓ Converted {len(hooks_string)} hooks:")
|
||||||
|
for hook_name in hooks_string.keys():
|
||||||
|
print(f" - {hook_name}")
|
||||||
|
|
||||||
|
print("\n✓ Preview of converted hook:")
|
||||||
|
print("-" * 60)
|
||||||
|
print(hooks_string["on_page_context_created"][:200] + "...")
|
||||||
|
print("-" * 60)
|
||||||
|
|
||||||
|
return hooks_string
|
||||||
|
|
||||||
|
|
||||||
|
async def test_docker_client_with_functions():
|
||||||
|
"""Test Docker client with function objects (automatic conversion)."""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("Testing Docker Client with Function Objects")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
# Note: This requires a running Crawl4AI Docker server
|
||||||
|
# Uncomment the following to test with actual server:
|
||||||
|
|
||||||
|
async with Crawl4aiDockerClient(base_url="http://localhost:11234", verbose=True) as client:
|
||||||
|
# Pass function objects directly - they'll be converted automatically
|
||||||
|
result = await client.crawl(
|
||||||
|
["https://httpbin.org/html"],
|
||||||
|
hooks={
|
||||||
|
"on_page_context_created": auth_hook,
|
||||||
|
"before_retrieve_html": scroll_hook
|
||||||
|
},
|
||||||
|
hooks_timeout=30
|
||||||
|
)
|
||||||
|
print(f"\n✓ Crawl successful: {result.success}")
|
||||||
|
print(f"✓ URL: {result.url}")
|
||||||
|
|
||||||
|
print("\n✓ Docker client accepts function objects directly")
|
||||||
|
print("✓ Automatic conversion happens internally")
|
||||||
|
print("✓ No manual string formatting needed!")
|
||||||
|
|
||||||
|
|
||||||
|
async def test_docker_client_with_strings():
|
||||||
|
"""Test Docker client with pre-converted strings."""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("Testing Docker Client with String Hooks")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
# Convert hooks to strings first
|
||||||
|
hooks_dict = {
|
||||||
|
"on_page_context_created": viewport_hook,
|
||||||
|
"before_retrieve_html": scroll_hook
|
||||||
|
}
|
||||||
|
hooks_string = hooks_to_string(hooks_dict)
|
||||||
|
|
||||||
|
# Note: This requires a running Crawl4AI Docker server
|
||||||
|
# Uncomment the following to test with actual server:
|
||||||
|
|
||||||
|
async with Crawl4aiDockerClient(base_url="http://localhost:11234", verbose=True) as client:
|
||||||
|
# Pass string hooks - they'll be used as-is
|
||||||
|
result = await client.crawl(
|
||||||
|
["https://httpbin.org/html"],
|
||||||
|
hooks=hooks_string,
|
||||||
|
hooks_timeout=30
|
||||||
|
)
|
||||||
|
print(f"\n✓ Crawl successful: {result.success}")
|
||||||
|
|
||||||
|
print("\n✓ Docker client also accepts pre-converted strings")
|
||||||
|
print("✓ Backward compatible with existing code")
|
||||||
|
|
||||||
|
|
||||||
|
async def show_usage_patterns():
|
||||||
|
"""Show different usage patterns."""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("Usage Patterns")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
print("\n1. Direct function usage (simplest):")
|
||||||
|
print("-" * 60)
|
||||||
|
print("""
|
||||||
|
async def my_hook(page, context, **kwargs):
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
return page
|
||||||
|
|
||||||
|
result = await client.crawl(
|
||||||
|
["https://example.com"],
|
||||||
|
hooks={"on_page_context_created": my_hook}
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
print("\n2. Convert then use:")
|
||||||
|
print("-" * 60)
|
||||||
|
print("""
|
||||||
|
hooks_dict = {"on_page_context_created": my_hook}
|
||||||
|
hooks_string = hooks_to_string(hooks_dict)
|
||||||
|
|
||||||
|
result = await client.crawl(
|
||||||
|
["https://example.com"],
|
||||||
|
hooks=hooks_string
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
print("\n3. Manual string (backward compatible):")
|
||||||
|
print("-" * 60)
|
||||||
|
print("""
|
||||||
|
hooks_string = {
|
||||||
|
"on_page_context_created": '''
|
||||||
|
async def hook(page, context, **kwargs):
|
||||||
|
await page.set_viewport_size({"width": 1920, "height": 1080})
|
||||||
|
return page
|
||||||
|
'''
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await client.crawl(
|
||||||
|
["https://example.com"],
|
||||||
|
hooks=hooks_string
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""Run all tests."""
|
||||||
|
print("\n🚀 Crawl4AI Hooks Utility Test Suite\n")
|
||||||
|
|
||||||
|
# Test the utility function
|
||||||
|
# await test_hooks_utility()
|
||||||
|
|
||||||
|
# Show usage with Docker client
|
||||||
|
# await test_docker_client_with_functions()
|
||||||
|
await test_docker_client_with_strings()
|
||||||
|
|
||||||
|
# Show different patterns
|
||||||
|
# await show_usage_patterns()
|
||||||
|
|
||||||
|
# print("\n" + "=" * 60)
|
||||||
|
# print("✓ All tests completed successfully!")
|
||||||
|
# print("=" * 60)
|
||||||
|
# print("\nKey Benefits:")
|
||||||
|
# print(" • Write hooks as regular Python functions")
|
||||||
|
# print(" • IDE support with autocomplete and type checking")
|
||||||
|
# print(" • Automatic conversion to API format")
|
||||||
|
# print(" • Backward compatible with string hooks")
|
||||||
|
# print(" • Same utility used everywhere")
|
||||||
|
# print("\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
305
tests/test_webhook_feature.sh
Executable file
305
tests/test_webhook_feature.sh
Executable file
@@ -0,0 +1,305 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
#############################################################################
|
||||||
|
# Webhook Feature Test Script
|
||||||
|
#
|
||||||
|
# This script tests the webhook feature implementation by:
|
||||||
|
# 1. Switching to the webhook feature branch
|
||||||
|
# 2. Installing dependencies
|
||||||
|
# 3. Starting the server
|
||||||
|
# 4. Running webhook tests
|
||||||
|
# 5. Cleaning up and returning to original branch
|
||||||
|
#
|
||||||
|
# Usage: ./test_webhook_feature.sh
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
set -e # Exit on error
|
||||||
|
|
||||||
|
# Colors for output
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
NC='\033[0m' # No Color
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
BRANCH_NAME="claude/implement-webhook-crawl-feature-011CULZY1Jy8N5MUkZqXkRVp"
|
||||||
|
VENV_PATH="venv"
|
||||||
|
SERVER_PORT=11235
|
||||||
|
WEBHOOK_PORT=8080
|
||||||
|
PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
|
|
||||||
|
# PID files for cleanup
|
||||||
|
REDIS_PID=""
|
||||||
|
SERVER_PID=""
|
||||||
|
WEBHOOK_PID=""
|
||||||
|
|
||||||
|
#############################################################################
|
||||||
|
# Utility Functions
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
log_info() {
|
||||||
|
echo -e "${BLUE}[INFO]${NC} $1"
|
||||||
|
}
|
||||||
|
|
||||||
|
log_success() {
|
||||||
|
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||||
|
}
|
||||||
|
|
||||||
|
log_warning() {
|
||||||
|
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||||
|
}
|
||||||
|
|
||||||
|
log_error() {
|
||||||
|
echo -e "${RED}[ERROR]${NC} $1"
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
log_info "Starting cleanup..."
|
||||||
|
|
||||||
|
# Kill webhook receiver if running
|
||||||
|
if [ ! -z "$WEBHOOK_PID" ] && kill -0 $WEBHOOK_PID 2>/dev/null; then
|
||||||
|
log_info "Stopping webhook receiver (PID: $WEBHOOK_PID)..."
|
||||||
|
kill $WEBHOOK_PID 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Kill server if running
|
||||||
|
if [ ! -z "$SERVER_PID" ] && kill -0 $SERVER_PID 2>/dev/null; then
|
||||||
|
log_info "Stopping server (PID: $SERVER_PID)..."
|
||||||
|
kill $SERVER_PID 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Kill Redis if running
|
||||||
|
if [ ! -z "$REDIS_PID" ] && kill -0 $REDIS_PID 2>/dev/null; then
|
||||||
|
log_info "Stopping Redis (PID: $REDIS_PID)..."
|
||||||
|
kill $REDIS_PID 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Also kill by port if PIDs didn't work
|
||||||
|
lsof -ti:$SERVER_PORT | xargs kill -9 2>/dev/null || true
|
||||||
|
lsof -ti:$WEBHOOK_PORT | xargs kill -9 2>/dev/null || true
|
||||||
|
lsof -ti:6379 | xargs kill -9 2>/dev/null || true
|
||||||
|
|
||||||
|
# Return to original branch
|
||||||
|
if [ ! -z "$ORIGINAL_BRANCH" ]; then
|
||||||
|
log_info "Switching back to branch: $ORIGINAL_BRANCH"
|
||||||
|
git checkout $ORIGINAL_BRANCH 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
log_success "Cleanup complete"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Set trap to cleanup on exit
|
||||||
|
trap cleanup EXIT INT TERM
|
||||||
|
|
||||||
|
#############################################################################
|
||||||
|
# Main Script
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
log_info "Starting webhook feature test script"
|
||||||
|
log_info "Project root: $PROJECT_ROOT"
|
||||||
|
|
||||||
|
cd "$PROJECT_ROOT"
|
||||||
|
|
||||||
|
# Step 1: Save current branch and fetch PR
|
||||||
|
log_info "Step 1: Fetching PR branch..."
|
||||||
|
ORIGINAL_BRANCH=$(git rev-parse --abbrev-ref HEAD)
|
||||||
|
log_info "Current branch: $ORIGINAL_BRANCH"
|
||||||
|
|
||||||
|
git fetch origin $BRANCH_NAME
|
||||||
|
log_success "Branch fetched"
|
||||||
|
|
||||||
|
# Step 2: Switch to new branch
|
||||||
|
log_info "Step 2: Switching to branch: $BRANCH_NAME"
|
||||||
|
git checkout $BRANCH_NAME
|
||||||
|
log_success "Switched to webhook feature branch"
|
||||||
|
|
||||||
|
# Step 3: Activate virtual environment
|
||||||
|
log_info "Step 3: Activating virtual environment..."
|
||||||
|
if [ ! -d "$VENV_PATH" ]; then
|
||||||
|
log_error "Virtual environment not found at $VENV_PATH"
|
||||||
|
log_info "Creating virtual environment..."
|
||||||
|
python3 -m venv $VENV_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
|
source $VENV_PATH/bin/activate
|
||||||
|
log_success "Virtual environment activated: $(which python)"
|
||||||
|
|
||||||
|
# Step 4: Install server dependencies
|
||||||
|
log_info "Step 4: Installing server dependencies..."
|
||||||
|
pip install -q -r deploy/docker/requirements.txt
|
||||||
|
log_success "Dependencies installed"
|
||||||
|
|
||||||
|
# Check if Redis is available
|
||||||
|
log_info "Checking Redis availability..."
|
||||||
|
if ! command -v redis-server &> /dev/null; then
|
||||||
|
log_warning "Redis not found, attempting to install..."
|
||||||
|
if command -v apt-get &> /dev/null; then
|
||||||
|
sudo apt-get update && sudo apt-get install -y redis-server
|
||||||
|
elif command -v brew &> /dev/null; then
|
||||||
|
brew install redis
|
||||||
|
else
|
||||||
|
log_error "Cannot install Redis automatically. Please install Redis manually."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 5: Start Redis in background
|
||||||
|
log_info "Step 5a: Starting Redis..."
|
||||||
|
redis-server --port 6379 --daemonize yes
|
||||||
|
sleep 2
|
||||||
|
REDIS_PID=$(pgrep redis-server)
|
||||||
|
log_success "Redis started (PID: $REDIS_PID)"
|
||||||
|
|
||||||
|
# Step 5b: Start server in background
|
||||||
|
log_info "Step 5b: Starting server on port $SERVER_PORT..."
|
||||||
|
cd deploy/docker
|
||||||
|
|
||||||
|
# Start server in background
|
||||||
|
python3 -m uvicorn server:app --host 0.0.0.0 --port $SERVER_PORT > /tmp/crawl4ai_server.log 2>&1 &
|
||||||
|
SERVER_PID=$!
|
||||||
|
cd "$PROJECT_ROOT"
|
||||||
|
|
||||||
|
log_info "Server started (PID: $SERVER_PID)"
|
||||||
|
|
||||||
|
# Wait for server to be ready
|
||||||
|
log_info "Waiting for server to be ready..."
|
||||||
|
for i in {1..30}; do
|
||||||
|
if curl -s http://localhost:$SERVER_PORT/health > /dev/null 2>&1; then
|
||||||
|
log_success "Server is ready!"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if [ $i -eq 30 ]; then
|
||||||
|
log_error "Server failed to start within 30 seconds"
|
||||||
|
log_info "Server logs:"
|
||||||
|
tail -50 /tmp/crawl4ai_server.log
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo -n "."
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Step 6: Create and run webhook test
|
||||||
|
log_info "Step 6: Creating webhook test script..."
|
||||||
|
|
||||||
|
cat > /tmp/test_webhook.py << 'PYTHON_SCRIPT'
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from flask import Flask, request, jsonify
|
||||||
|
from threading import Thread, Event
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
CRAWL4AI_BASE_URL = "http://localhost:11235"
|
||||||
|
WEBHOOK_BASE_URL = "http://localhost:8080"
|
||||||
|
|
||||||
|
# Flask app for webhook receiver
|
||||||
|
app = Flask(__name__)
|
||||||
|
webhook_received = Event()
|
||||||
|
webhook_data = {}
|
||||||
|
|
||||||
|
@app.route('/webhook', methods=['POST'])
|
||||||
|
def handle_webhook():
|
||||||
|
global webhook_data
|
||||||
|
webhook_data = request.json
|
||||||
|
webhook_received.set()
|
||||||
|
print(f"\n✅ Webhook received: {json.dumps(webhook_data, indent=2)}")
|
||||||
|
return jsonify({"status": "received"}), 200
|
||||||
|
|
||||||
|
def start_webhook_server():
|
||||||
|
app.run(host='0.0.0.0', port=8080, debug=False, use_reloader=False)
|
||||||
|
|
||||||
|
# Start webhook server in background
|
||||||
|
webhook_thread = Thread(target=start_webhook_server, daemon=True)
|
||||||
|
webhook_thread.start()
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
print("🚀 Submitting crawl job with webhook...")
|
||||||
|
|
||||||
|
# Submit job with webhook
|
||||||
|
payload = {
|
||||||
|
"urls": ["https://example.com"],
|
||||||
|
"browser_config": {"headless": True},
|
||||||
|
"crawler_config": {"cache_mode": "bypass"},
|
||||||
|
"webhook_config": {
|
||||||
|
"webhook_url": f"{WEBHOOK_BASE_URL}/webhook",
|
||||||
|
"webhook_data_in_payload": True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"{CRAWL4AI_BASE_URL}/crawl/job",
|
||||||
|
json=payload,
|
||||||
|
headers={"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
|
|
||||||
|
if not response.ok:
|
||||||
|
print(f"❌ Failed to submit job: {response.text}")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
task_id = response.json()['task_id']
|
||||||
|
print(f"✅ Job submitted successfully, task_id: {task_id}")
|
||||||
|
|
||||||
|
# Wait for webhook (with timeout)
|
||||||
|
print("⏳ Waiting for webhook notification...")
|
||||||
|
if webhook_received.wait(timeout=60):
|
||||||
|
print(f"✅ Webhook received!")
|
||||||
|
print(f" Task ID: {webhook_data.get('task_id')}")
|
||||||
|
print(f" Status: {webhook_data.get('status')}")
|
||||||
|
print(f" URLs: {webhook_data.get('urls')}")
|
||||||
|
|
||||||
|
if webhook_data.get('status') == 'completed':
|
||||||
|
if 'data' in webhook_data:
|
||||||
|
print(f" ✅ Data included in webhook payload")
|
||||||
|
results = webhook_data['data'].get('results', [])
|
||||||
|
if results:
|
||||||
|
print(f" 📄 Crawled {len(results)} URL(s)")
|
||||||
|
for result in results:
|
||||||
|
print(f" - {result.get('url')}: {len(result.get('markdown', ''))} chars")
|
||||||
|
print("\n🎉 Webhook test PASSED!")
|
||||||
|
exit(0)
|
||||||
|
else:
|
||||||
|
print(f" ❌ Job failed: {webhook_data.get('error')}")
|
||||||
|
exit(1)
|
||||||
|
else:
|
||||||
|
print("❌ Webhook not received within 60 seconds")
|
||||||
|
# Try polling as fallback
|
||||||
|
print("⏳ Trying to poll job status...")
|
||||||
|
for i in range(10):
|
||||||
|
status_response = requests.get(f"{CRAWL4AI_BASE_URL}/crawl/job/{task_id}")
|
||||||
|
if status_response.ok:
|
||||||
|
status = status_response.json()
|
||||||
|
print(f" Status: {status.get('status')}")
|
||||||
|
if status.get('status') in ['completed', 'failed']:
|
||||||
|
break
|
||||||
|
time.sleep(2)
|
||||||
|
exit(1)
|
||||||
|
PYTHON_SCRIPT
|
||||||
|
|
||||||
|
# Install Flask for webhook receiver
|
||||||
|
pip install -q flask
|
||||||
|
|
||||||
|
# Run the webhook test
|
||||||
|
log_info "Running webhook test..."
|
||||||
|
python3 /tmp/test_webhook.py &
|
||||||
|
WEBHOOK_PID=$!
|
||||||
|
|
||||||
|
# Wait for test to complete
|
||||||
|
wait $WEBHOOK_PID
|
||||||
|
TEST_EXIT_CODE=$?
|
||||||
|
|
||||||
|
# Step 7: Verify results
|
||||||
|
log_info "Step 7: Verifying test results..."
|
||||||
|
if [ $TEST_EXIT_CODE -eq 0 ]; then
|
||||||
|
log_success "✅ Webhook test PASSED!"
|
||||||
|
else
|
||||||
|
log_error "❌ Webhook test FAILED (exit code: $TEST_EXIT_CODE)"
|
||||||
|
log_info "Server logs:"
|
||||||
|
tail -100 /tmp/crawl4ai_server.log
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 8: Cleanup happens automatically via trap
|
||||||
|
log_success "All tests completed successfully! 🎉"
|
||||||
|
log_info "Cleanup will happen automatically..."
|
||||||
Reference in New Issue
Block a user