Files
crawl4ai/deploy/docker/routers/scripts.py
AHMET YILMAZ 201843a204 Add comprehensive tests for anti-bot strategies and extended features
- Implemented `test_adapter_verification.py` to verify correct usage of browser adapters.
- Created `test_all_features.py` for a comprehensive suite covering URL seeding, adaptive crawling, browser adapters, proxy rotation, and dispatchers.
- Developed `test_anti_bot_strategy.py` to validate the functionality of various anti-bot strategies.
- Added `test_antibot_simple.py` for simple testing of anti-bot strategies using async web crawling.
- Introduced `test_bot_detection.py` to assess adapter performance against bot detection mechanisms.
- Compiled `test_final_summary.py` to provide a detailed summary of all tests and their results.
2025-10-07 18:51:13 +08:00

307 lines
8.6 KiB
Python

from typing import Optional
from fastapi import APIRouter, File, Form, HTTPException, UploadFile
from schemas import C4AScriptPayload
from crawl4ai.script import (
CompilationResult,
ValidationResult,
# ErrorDetail
)
# Import all necessary components from the crawl4ai library
# C4A Script Language Support
from crawl4ai.script import (
compile as c4a_compile,
)
from crawl4ai.script import (
validate as c4a_validate,
)
# --- APIRouter for c4a Scripts Endpoints ---
router = APIRouter(
prefix="/c4a",
tags=["c4a Scripts"],
)
# --- Background Worker Function ---
@router.post("/validate",
summary="Validate C4A-Script",
description="Validate the syntax of a C4A-Script without compiling it.",
response_description="Validation result with errors if any",
response_model=ValidationResult
)
async def validate_c4a_script_endpoint(payload: C4AScriptPayload):
"""
Validate the syntax of a C4A-Script.
Checks the script syntax without compiling to executable JavaScript.
Returns detailed error information if validation fails.
**Request Body:**
```json
{
"script": "NAVIGATE https://example.com\\nWAIT 2\\nCLICK button.submit"
}
```
**Response (Valid):**
```json
{
"success": true,
"errors": []
}
```
**Response (Invalid):**
```json
{
"success": false,
"errors": [
{
"line": 3,
"message": "Unknown command: CLCK",
"type": "SyntaxError"
}
]
}
```
**Usage:**
```python
response = requests.post(
"http://localhost:11235/c4a/validate",
headers={"Authorization": f"Bearer {token}"},
json={
"script": "NAVIGATE https://example.com\\nWAIT 2"
}
)
result = response.json()
if result["success"]:
print("Script is valid!")
else:
for error in result["errors"]:
print(f"Line {error['line']}: {error['message']}")
```
**Notes:**
- Validates syntax only, doesn't execute
- Returns detailed error locations
- Use before compiling to check for issues
"""
# The validate function is designed not to raise exceptions
validation_result = c4a_validate(payload.script)
return validation_result
@router.post("/compile",
summary="Compile C4A-Script",
description="Compile a C4A-Script into executable JavaScript code.",
response_description="Compiled JavaScript code or compilation errors",
response_model=CompilationResult
)
async def compile_c4a_script_endpoint(payload: C4AScriptPayload):
"""
Compile a C4A-Script into executable JavaScript.
Transforms high-level C4A-Script commands into JavaScript that can be
executed in a browser context.
**Request Body:**
```json
{
"script": "NAVIGATE https://example.com\\nWAIT 2\\nCLICK button.submit"
}
```
**Response (Success):**
```json
{
"success": true,
"javascript": "await page.goto('https://example.com');\\nawait page.waitForTimeout(2000);\\nawait page.click('button.submit');",
"errors": []
}
```
**Response (Error):**
```json
{
"success": false,
"javascript": null,
"errors": [
{
"line": 2,
"message": "Invalid WAIT duration",
"type": "CompilationError"
}
]
}
```
**Usage:**
```python
response = requests.post(
"http://localhost:11235/c4a/compile",
headers={"Authorization": f"Bearer {token}"},
json={
"script": "NAVIGATE https://example.com\\nCLICK .login-button"
}
)
result = response.json()
if result["success"]:
print("Compiled JavaScript:")
print(result["javascript"])
else:
print("Compilation failed:", result["errors"])
```
**C4A-Script Commands:**
- `NAVIGATE <url>` - Navigate to URL
- `WAIT <seconds>` - Wait for specified time
- `CLICK <selector>` - Click element
- `TYPE <selector> <text>` - Type text into element
- `SCROLL <direction>` - Scroll page
- And many more...
**Notes:**
- Returns HTTP 400 if compilation fails
- JavaScript can be used with /execute_js endpoint
- Simplifies browser automation scripting
"""
# The compile function also returns a result object instead of raising
compilation_result = c4a_compile(payload.script)
if not compilation_result.success:
# You can optionally raise an HTTP exception for failed compilations
# This makes it clearer on the client-side that it was a bad request
raise HTTPException(
status_code=400,
detail=compilation_result.to_dict(), # FastAPI will serialize this
)
return compilation_result
@router.post("/compile-file",
summary="Compile C4A-Script from File",
description="Compile a C4A-Script from an uploaded file or form string.",
response_description="Compiled JavaScript code or compilation errors",
response_model=CompilationResult
)
async def compile_c4a_script_file_endpoint(
file: Optional[UploadFile] = File(None), script: Optional[str] = Form(None)
):
"""
Compile a C4A-Script from file upload or form data.
Accepts either a file upload or a string parameter. Useful for uploading
C4A-Script files or sending multipart form data.
**Parameters:**
- `file`: C4A-Script file upload (multipart/form-data)
- `script`: C4A-Script content as string (form field)
**Note:** Provide either file OR script, not both.
**Request (File Upload):**
```bash
curl -X POST "http://localhost:11235/c4a/compile-file" \\
-H "Authorization: Bearer YOUR_TOKEN" \\
-F "file=@myscript.c4a"
```
**Request (Form String):**
```bash
curl -X POST "http://localhost:11235/c4a/compile-file" \\
-H "Authorization: Bearer YOUR_TOKEN" \\
-F "script=NAVIGATE https://example.com"
```
**Response:**
```json
{
"success": true,
"javascript": "await page.goto('https://example.com');",
"errors": []
}
```
**Usage (Python with file):**
```python
with open('script.c4a', 'rb') as f:
response = requests.post(
"http://localhost:11235/c4a/compile-file",
headers={"Authorization": f"Bearer {token}"},
files={"file": f}
)
result = response.json()
print(result["javascript"])
```
**Usage (Python with string):**
```python
response = requests.post(
"http://localhost:11235/c4a/compile-file",
headers={"Authorization": f"Bearer {token}"},
data={"script": "NAVIGATE https://example.com"}
)
result = response.json()
print(result["javascript"])
```
**Notes:**
- File must be UTF-8 encoded text
- Use for batch script compilation
- Returns HTTP 400 if both or neither parameter provided
- Returns HTTP 400 if compilation fails
"""
script_content = None
# Validate that at least one input is provided
if not file and not script:
raise HTTPException(
status_code=400,
detail={"error": "Either 'file' or 'script' parameter must be provided"},
)
# If both are provided, prioritize the file
if file and script:
raise HTTPException(
status_code=400,
detail={"error": "Please provide either 'file' or 'script', not both"},
)
# Handle file upload
if file:
try:
file_content = await file.read()
script_content = file_content.decode("utf-8")
except UnicodeDecodeError as exc:
raise HTTPException(
status_code=400,
detail={"error": "File must be a valid UTF-8 text file"},
) from exc
except Exception as e:
raise HTTPException(
status_code=400, detail={"error": f"Error reading file: {str(e)}"}
) from e
# Handle string content
elif script:
script_content = script
# Compile the script content
compilation_result = c4a_compile(script_content)
if not compilation_result.success:
# You can optionally raise an HTTP exception for failed compilations
# This makes it clearer on the client-side that it was a bad request
raise HTTPException(
status_code=400,
detail=compilation_result.to_dict(), # FastAPI will serialize this
)
return compilation_result