feat(script): add new scripting capabilities and documentation

This commit introduces a comprehensive set of new scripts and examples to enhance the scripting capabilities of the crawl4ai project. The changes include the addition of several Python scripts for compiling and executing scripts, as well as a variety of example scripts demonstrating different functionalities such as login flows, data extraction, and multi-step workflows. Additionally, detailed documentation has been created to guide users on how to utilize these new features effectively.

The following significant modifications were made:
- Added core scripting files: , , and .
- Created a new documentation file  to provide an overview of the new features.
- Introduced multiple example scripts in the  directory to showcase various use cases.
- Updated  and  to integrate the new functionalities.
- Added font assets for improved documentation presentation.

These changes significantly expand the functionality of the crawl4ai project, allowing users to create more complex and varied scripts with ease.
This commit is contained in:
UncleCode
2025-06-06 17:16:53 +08:00
parent e731596315
commit 3f6f2e998c
29 changed files with 2536 additions and 0 deletions

View File

@@ -69,6 +69,16 @@ from .deep_crawling import (
# NEW: Import AsyncUrlSeeder
from .async_url_seeder import AsyncUrlSeeder
# C4A Script Language Support
from .script import (
compile as c4a_compile,
validate as c4a_validate,
compile_file as c4a_compile_file,
CompilationResult,
ValidationResult,
ErrorDetail
)
from .utils import (
start_colab_display_server,
setup_colab_environment
@@ -139,6 +149,13 @@ __all__ = [
"ProxyConfig",
"start_colab_display_server",
"setup_colab_environment",
# C4A Script additions
"c4a_compile",
"c4a_validate",
"c4a_compile_file",
"CompilationResult",
"ValidationResult",
"ErrorDetail",
]

View File

@@ -911,6 +911,7 @@ class CrawlerRunConfig():
semaphore_count: int = 5,
# Page Interaction Parameters
js_code: Union[str, List[str]] = None,
c4a_script: Union[str, List[str]] = None,
js_only: bool = False,
ignore_body_visibility: bool = True,
scan_full_page: bool = False,
@@ -1009,6 +1010,7 @@ class CrawlerRunConfig():
# Page Interaction Parameters
self.js_code = js_code
self.c4a_script = c4a_script
self.js_only = js_only
self.ignore_body_visibility = ignore_body_visibility
self.scan_full_page = scan_full_page
@@ -1084,6 +1086,59 @@ class CrawlerRunConfig():
# Experimental Parameters
self.experimental = experimental or {}
# Compile C4A scripts if provided
if self.c4a_script and not self.js_code:
self._compile_c4a_script()
def _compile_c4a_script(self):
"""Compile C4A script to JavaScript"""
try:
# Try importing the compiler
try:
from .script import compile
except ImportError:
from crawl4ai.script import compile
# Handle both string and list inputs
if isinstance(self.c4a_script, str):
scripts = [self.c4a_script]
else:
scripts = self.c4a_script
# Compile each script
compiled_js = []
for i, script in enumerate(scripts):
result = compile(script)
if result.success:
compiled_js.extend(result.js_code)
else:
# Format error message following existing patterns
error = result.first_error
error_msg = (
f"C4A Script compilation error (script {i+1}):\n"
f" Line {error.line}, Column {error.column}: {error.message}\n"
f" Code: {error.source_line}"
)
if error.suggestions:
error_msg += f"\n Suggestion: {error.suggestions[0].message}"
raise ValueError(error_msg)
self.js_code = compiled_js
except ImportError:
raise ValueError(
"C4A script compiler not available. "
"Please ensure crawl4ai.script module is properly installed."
)
except Exception as e:
# Re-raise with context
if "compilation error" not in str(e).lower():
raise ValueError(f"Failed to compile C4A script: {str(e)}")
raise
def __getattr__(self, name):

View File

@@ -0,0 +1,35 @@
"""
C4A-Script: A domain-specific language for web automation in Crawl4AI
"""
from .c4a_compile import C4ACompiler, compile, validate, compile_file
from .c4a_result import (
CompilationResult,
ValidationResult,
ErrorDetail,
WarningDetail,
ErrorType,
Severity,
Suggestion
)
__all__ = [
# Main compiler
"C4ACompiler",
# Convenience functions
"compile",
"validate",
"compile_file",
# Result types
"CompilationResult",
"ValidationResult",
"ErrorDetail",
"WarningDetail",
# Enums
"ErrorType",
"Severity",
"Suggestion"
]

View File

@@ -0,0 +1,328 @@
"""
Clean C4A-Script API with Result pattern
No exceptions - always returns results
"""
from __future__ import annotations
import pathlib
import re
from typing import Union, List, Optional
from .c4a_result import (
CompilationResult, ValidationResult, ErrorDetail, WarningDetail,
ErrorType, Severity, Suggestion
)
from .c4ai_script import Compiler
from lark.exceptions import UnexpectedToken, UnexpectedCharacters, VisitError
class C4ACompiler:
"""Main compiler with result-based API"""
# Error code mapping
ERROR_CODES = {
"missing_then": "E001",
"missing_paren": "E002",
"missing_comma": "E003",
"missing_endproc": "E004",
"undefined_proc": "E005",
"missing_backticks": "E006",
"invalid_command": "E007",
"syntax_error": "E999"
}
@classmethod
def compile(cls, script: Union[str, List[str]], root: Optional[pathlib.Path] = None) -> CompilationResult:
"""
Compile C4A-Script to JavaScript
Args:
script: C4A-Script as string or list of lines
root: Root directory for includes
Returns:
CompilationResult with success status and JS code or errors
"""
# Normalize input
if isinstance(script, list):
script_text = '\n'.join(script)
script_lines = script
else:
script_text = script
script_lines = script.split('\n')
try:
# Try compilation
compiler = Compiler(root)
js_code = compiler.compile(script_text)
# Success!
result = CompilationResult(
success=True,
js_code=js_code,
metadata={
"lineCount": len(script_lines),
"statementCount": len(js_code)
}
)
# Add any warnings (future feature)
# result.warnings = cls._check_warnings(script_text)
return result
except Exception as e:
# Convert exception to ErrorDetail
error = cls._exception_to_error(e, script_lines)
return CompilationResult(
success=False,
errors=[error],
metadata={
"lineCount": len(script_lines)
}
)
@classmethod
def validate(cls, script: Union[str, List[str]]) -> ValidationResult:
"""
Validate script syntax without generating code
Args:
script: C4A-Script to validate
Returns:
ValidationResult with validity status and any errors
"""
result = cls.compile(script)
return ValidationResult(
valid=result.success,
errors=result.errors,
warnings=result.warnings
)
@classmethod
def compile_file(cls, path: Union[str, pathlib.Path]) -> CompilationResult:
"""
Compile a C4A-Script file
Args:
path: Path to the file
Returns:
CompilationResult
"""
path = pathlib.Path(path)
if not path.exists():
error = ErrorDetail(
type=ErrorType.RUNTIME,
code="E100",
severity=Severity.ERROR,
message=f"File not found: {path}",
line=0,
column=0,
source_line=""
)
return CompilationResult(success=False, errors=[error])
try:
script = path.read_text()
return cls.compile(script, root=path.parent)
except Exception as e:
error = ErrorDetail(
type=ErrorType.RUNTIME,
code="E101",
severity=Severity.ERROR,
message=f"Error reading file: {str(e)}",
line=0,
column=0,
source_line=""
)
return CompilationResult(success=False, errors=[error])
@classmethod
def _exception_to_error(cls, exc: Exception, script_lines: List[str]) -> ErrorDetail:
"""Convert an exception to ErrorDetail"""
if isinstance(exc, UnexpectedToken):
return cls._handle_unexpected_token(exc, script_lines)
elif isinstance(exc, UnexpectedCharacters):
return cls._handle_unexpected_chars(exc, script_lines)
elif isinstance(exc, ValueError):
return cls._handle_value_error(exc, script_lines)
else:
# Generic error
return ErrorDetail(
type=ErrorType.SYNTAX,
code=cls.ERROR_CODES["syntax_error"],
severity=Severity.ERROR,
message=str(exc),
line=1,
column=1,
source_line=script_lines[0] if script_lines else ""
)
@classmethod
def _handle_unexpected_token(cls, exc: UnexpectedToken, script_lines: List[str]) -> ErrorDetail:
"""Handle UnexpectedToken errors"""
line = exc.line
column = exc.column
# Get context lines
source_line = script_lines[line - 1] if 0 < line <= len(script_lines) else ""
line_before = script_lines[line - 2] if line > 1 and line <= len(script_lines) + 1 else None
line_after = script_lines[line] if 0 < line < len(script_lines) else None
# Determine error type and suggestions
if exc.token.type == 'CLICK' and 'THEN' in str(exc.expected):
code = cls.ERROR_CODES["missing_then"]
message = "Missing 'THEN' keyword after IF condition"
suggestions = [
Suggestion(
"Add 'THEN' after the condition",
source_line.replace("CLICK", "THEN CLICK") if source_line else None
)
]
elif exc.token.type == '$END':
code = cls.ERROR_CODES["missing_endproc"]
message = "Unexpected end of script"
suggestions = [
Suggestion("Check for missing ENDPROC"),
Suggestion("Ensure all procedures are properly closed")
]
elif 'RPAR' in str(exc.expected):
code = cls.ERROR_CODES["missing_paren"]
message = "Missing closing parenthesis ')'"
suggestions = [
Suggestion("Add closing parenthesis at the end of the condition")
]
elif 'COMMA' in str(exc.expected):
code = cls.ERROR_CODES["missing_comma"]
message = "Missing comma ',' in command"
suggestions = [
Suggestion("Add comma between arguments")
]
else:
# Check if this might be missing backticks
if exc.token.type == 'NAME' and 'BACKTICK_STRING' in str(exc.expected):
code = cls.ERROR_CODES["missing_backticks"]
message = "Selector must be wrapped in backticks"
suggestions = [
Suggestion(
"Wrap the selector in backticks",
f"`{exc.token.value}`"
)
]
else:
code = cls.ERROR_CODES["syntax_error"]
message = f"Unexpected '{exc.token.value}'"
if exc.expected:
expected_list = [str(e) for e in exc.expected if not str(e).startswith('_')][:3]
if expected_list:
message += f". Expected: {', '.join(expected_list)}"
suggestions = []
return ErrorDetail(
type=ErrorType.SYNTAX,
code=code,
severity=Severity.ERROR,
message=message,
line=line,
column=column,
source_line=source_line,
line_before=line_before,
line_after=line_after,
suggestions=suggestions
)
@classmethod
def _handle_unexpected_chars(cls, exc: UnexpectedCharacters, script_lines: List[str]) -> ErrorDetail:
"""Handle UnexpectedCharacters errors"""
line = exc.line
column = exc.column
source_line = script_lines[line - 1] if 0 < line <= len(script_lines) else ""
# Check for missing backticks
if "CLICK" in source_line and column > source_line.find("CLICK"):
code = cls.ERROR_CODES["missing_backticks"]
message = "Selector must be wrapped in backticks"
suggestions = [
Suggestion(
"Wrap the selector in backticks",
re.sub(r'CLICK\s+([^\s]+)', r'CLICK `\1`', source_line)
)
]
else:
code = cls.ERROR_CODES["syntax_error"]
message = f"Invalid character at position {column}"
suggestions = []
return ErrorDetail(
type=ErrorType.SYNTAX,
code=code,
severity=Severity.ERROR,
message=message,
line=line,
column=column,
source_line=source_line,
suggestions=suggestions
)
@classmethod
def _handle_value_error(cls, exc: ValueError, script_lines: List[str]) -> ErrorDetail:
"""Handle ValueError (runtime errors)"""
message = str(exc)
# Check for undefined procedure
if "Unknown procedure" in message:
proc_match = re.search(r"'([^']+)'", message)
if proc_match:
proc_name = proc_match.group(1)
# Find the line with the procedure call
for i, line in enumerate(script_lines):
if proc_name in line and not line.strip().startswith('PROC'):
return ErrorDetail(
type=ErrorType.RUNTIME,
code=cls.ERROR_CODES["undefined_proc"],
severity=Severity.ERROR,
message=f"Undefined procedure '{proc_name}'",
line=i + 1,
column=line.find(proc_name) + 1,
source_line=line,
suggestions=[
Suggestion(
f"Define the procedure before using it",
f"PROC {proc_name}\n # commands here\nENDPROC"
)
]
)
# Generic runtime error
return ErrorDetail(
type=ErrorType.RUNTIME,
code="E999",
severity=Severity.ERROR,
message=message,
line=1,
column=1,
source_line=script_lines[0] if script_lines else ""
)
# Convenience functions for direct use
def compile(script: Union[str, List[str]], root: Optional[pathlib.Path] = None) -> CompilationResult:
"""Compile C4A-Script to JavaScript"""
return C4ACompiler.compile(script, root)
def validate(script: Union[str, List[str]]) -> ValidationResult:
"""Validate C4A-Script syntax"""
return C4ACompiler.validate(script)
def compile_file(path: Union[str, pathlib.Path]) -> CompilationResult:
"""Compile C4A-Script file"""
return C4ACompiler.compile_file(path)

View File

@@ -0,0 +1,219 @@
"""
Result classes for C4A-Script compilation
Clean API design with no exceptions
"""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Dict, Any, Optional
import json
class ErrorType(Enum):
SYNTAX = "syntax"
SEMANTIC = "semantic"
RUNTIME = "runtime"
class Severity(Enum):
ERROR = "error"
WARNING = "warning"
INFO = "info"
@dataclass
class Suggestion:
"""A suggestion for fixing an error"""
message: str
fix: Optional[str] = None
def to_dict(self) -> dict:
return {
"message": self.message,
"fix": self.fix
}
@dataclass
class ErrorDetail:
"""Detailed information about a compilation error"""
# Core info
type: ErrorType
code: str # E001, E002, etc.
severity: Severity
message: str
# Location
line: int
column: int
# Context
source_line: str
# Optional fields with defaults
end_line: Optional[int] = None
end_column: Optional[int] = None
line_before: Optional[str] = None
line_after: Optional[str] = None
# Help
suggestions: List[Suggestion] = field(default_factory=list)
documentation_url: Optional[str] = None
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization"""
return {
"type": self.type.value,
"code": self.code,
"severity": self.severity.value,
"message": self.message,
"location": {
"line": self.line,
"column": self.column,
"endLine": self.end_line,
"endColumn": self.end_column
},
"context": {
"sourceLine": self.source_line,
"lineBefore": self.line_before,
"lineAfter": self.line_after,
"marker": {
"start": self.column - 1,
"length": (self.end_column - self.column) if self.end_column else 1
}
},
"suggestions": [s.to_dict() for s in self.suggestions],
"documentationUrl": self.documentation_url
}
def to_json(self) -> str:
"""Convert to JSON string"""
return json.dumps(self.to_dict(), indent=2)
@property
def formatted_message(self) -> str:
"""Returns the nice text format for terminals"""
lines = []
lines.append(f"\n{'='*60}")
lines.append(f"{self.type.value.title()} Error [{self.code}]")
lines.append(f"{'='*60}")
lines.append(f"Location: Line {self.line}, Column {self.column}")
lines.append(f"Error: {self.message}")
if self.source_line:
marker = " " * (self.column - 1) + "^"
if self.end_column:
marker += "~" * (self.end_column - self.column - 1)
lines.append(f"\nCode:")
if self.line_before:
lines.append(f" {self.line - 1: >3} | {self.line_before}")
lines.append(f" {self.line: >3} | {self.source_line}")
lines.append(f" | {marker}")
if self.line_after:
lines.append(f" {self.line + 1: >3} | {self.line_after}")
if self.suggestions:
lines.append("\nSuggestions:")
for i, suggestion in enumerate(self.suggestions, 1):
lines.append(f" {i}. {suggestion.message}")
if suggestion.fix:
lines.append(f" Fix: {suggestion.fix}")
lines.append("="*60)
return "\n".join(lines)
@property
def simple_message(self) -> str:
"""Returns just the error message without formatting"""
return f"Line {self.line}: {self.message}"
@dataclass
class WarningDetail:
"""Information about a compilation warning"""
code: str
message: str
line: int
column: int
def to_dict(self) -> dict:
return {
"code": self.code,
"message": self.message,
"line": self.line,
"column": self.column
}
@dataclass
class CompilationResult:
"""Result of C4A-Script compilation"""
success: bool
js_code: Optional[List[str]] = None
errors: List[ErrorDetail] = field(default_factory=list)
warnings: List[WarningDetail] = field(default_factory=list)
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization"""
return {
"success": self.success,
"jsCode": self.js_code,
"errors": [e.to_dict() for e in self.errors],
"warnings": [w.to_dict() for w in self.warnings],
"metadata": self.metadata
}
def to_json(self) -> str:
"""Convert to JSON string"""
return json.dumps(self.to_dict(), indent=2)
@property
def has_errors(self) -> bool:
"""Check if there are any errors"""
return len(self.errors) > 0
@property
def has_warnings(self) -> bool:
"""Check if there are any warnings"""
return len(self.warnings) > 0
@property
def first_error(self) -> Optional[ErrorDetail]:
"""Get the first error if any"""
return self.errors[0] if self.errors else None
def __str__(self) -> str:
"""String representation for debugging"""
if self.success:
msg = f"✓ Compilation successful"
if self.js_code:
msg += f" - {len(self.js_code)} statements generated"
if self.warnings:
msg += f" ({len(self.warnings)} warnings)"
return msg
else:
return f"✗ Compilation failed - {len(self.errors)} error(s)"
@dataclass
class ValidationResult:
"""Result of script validation"""
valid: bool
errors: List[ErrorDetail] = field(default_factory=list)
warnings: List[WarningDetail] = field(default_factory=list)
def to_dict(self) -> dict:
return {
"valid": self.valid,
"errors": [e.to_dict() for e in self.errors],
"warnings": [w.to_dict() for w in self.warnings]
}
def to_json(self) -> str:
return json.dumps(self.to_dict(), indent=2)
@property
def first_error(self) -> Optional[ErrorDetail]:
return self.errors[0] if self.errors else None

View File

@@ -0,0 +1,623 @@
"""
2025-06-03
By Unclcode:
C4A-Script Language Documentation
Feeds Crawl4AI via CrawlerRunConfig(js_code=[ ... ]) no core modifications.
"""
from __future__ import annotations
import pathlib, re, sys, textwrap
from dataclasses import dataclass
from typing import Any, Dict, List, Union
from lark import Lark, Transformer, v_args
from lark.exceptions import UnexpectedToken, UnexpectedCharacters, VisitError
# --------------------------------------------------------------------------- #
# Custom Error Classes
# --------------------------------------------------------------------------- #
class C4AScriptError(Exception):
"""Custom error class for C4A-Script compilation errors"""
def __init__(self, message: str, line: int = None, column: int = None,
error_type: str = "Syntax Error", details: str = None):
self.message = message
self.line = line
self.column = column
self.error_type = error_type
self.details = details
super().__init__(self._format_message())
def _format_message(self) -> str:
"""Format a clear error message"""
lines = [f"\n{'='*60}"]
lines.append(f"C4A-Script {self.error_type}")
lines.append(f"{'='*60}")
if self.line:
lines.append(f"Location: Line {self.line}" + (f", Column {self.column}" if self.column else ""))
lines.append(f"Error: {self.message}")
if self.details:
lines.append(f"\nDetails: {self.details}")
lines.append("="*60)
return "\n".join(lines)
@classmethod
def from_exception(cls, exc: Exception, script: Union[str, List[str]]) -> 'C4AScriptError':
"""Create C4AScriptError from another exception"""
script_text = script if isinstance(script, str) else '\n'.join(script)
script_lines = script_text.split('\n')
if isinstance(exc, UnexpectedToken):
# Extract line and column from UnexpectedToken
line = exc.line
column = exc.column
# Get the problematic line
if 0 < line <= len(script_lines):
problem_line = script_lines[line - 1]
marker = " " * (column - 1) + "^"
details = f"\nCode:\n {problem_line}\n {marker}\n"
# Improve error message based on context
if exc.token.type == 'CLICK' and 'THEN' in str(exc.expected):
message = "Missing 'THEN' keyword after IF condition"
elif exc.token.type == '$END':
message = "Unexpected end of script. Check for missing ENDPROC or incomplete commands"
elif 'RPAR' in str(exc.expected):
message = "Missing closing parenthesis ')'"
elif 'COMMA' in str(exc.expected):
message = "Missing comma ',' in command"
else:
message = f"Unexpected '{exc.token}'"
if exc.expected:
expected_list = [str(e) for e in exc.expected if not e.startswith('_')]
if expected_list:
message += f". Expected: {', '.join(expected_list[:3])}"
details += f"Token: {exc.token.type} ('{exc.token.value}')"
else:
message = str(exc)
details = None
return cls(message, line, column, "Syntax Error", details)
elif isinstance(exc, UnexpectedCharacters):
# Extract line and column
line = exc.line
column = exc.column
if 0 < line <= len(script_lines):
problem_line = script_lines[line - 1]
marker = " " * (column - 1) + "^"
details = f"\nCode:\n {problem_line}\n {marker}\n"
message = f"Invalid character or unexpected text at position {column}"
else:
message = str(exc)
details = None
return cls(message, line, column, "Syntax Error", details)
elif isinstance(exc, ValueError):
# Handle runtime errors like undefined procedures
message = str(exc)
# Try to find which line caused the error
if "Unknown procedure" in message:
proc_name = re.search(r"'([^']+)'", message)
if proc_name:
proc_name = proc_name.group(1)
for i, line in enumerate(script_lines, 1):
if proc_name in line and not line.strip().startswith('PROC'):
details = f"\nCode:\n {line.strip()}\n\nMake sure the procedure '{proc_name}' is defined with PROC...ENDPROC"
return cls(f"Undefined procedure '{proc_name}'", i, None, "Runtime Error", details)
return cls(message, None, None, "Runtime Error", None)
else:
# Generic error
return cls(str(exc), None, None, "Compilation Error", None)
# --------------------------------------------------------------------------- #
# 1. Grammar
# --------------------------------------------------------------------------- #
GRAMMAR = r"""
start : line*
?line : command | proc_def | include | comment
command : wait | nav | click_cmd | double_click | right_click | move | drag | scroll
| type | press | key_down | key_up
| eval_cmd | set_var | proc_call | if_cmd | repeat_cmd
wait : "WAIT" (ESCAPED_STRING|BACKTICK_STRING|NUMBER) NUMBER? -> wait_cmd
nav : "GO" URL -> go
| "RELOAD" -> reload
| "BACK" -> back
| "FORWARD" -> forward
click_cmd : "CLICK" (BACKTICK_STRING|NUMBER NUMBER) -> click
double_click : "DOUBLE_CLICK" (BACKTICK_STRING|NUMBER NUMBER) -> double_click
right_click : "RIGHT_CLICK" (BACKTICK_STRING|NUMBER NUMBER) -> right_click
move : "MOVE" coords -> move
drag : "DRAG" coords coords -> drag
scroll : "SCROLL" DIR NUMBER? -> scroll
type : "TYPE" (ESCAPED_STRING | NAME) -> type
press : "PRESS" WORD -> press
key_down : "KEY_DOWN" WORD -> key_down
key_up : "KEY_UP" WORD -> key_up
eval_cmd : "EVAL" BACKTICK_STRING -> eval_cmd
set_var : "SET" NAME "=" value -> set_var
proc_call : NAME -> proc_call
proc_def : "PROC" NAME line* "ENDPROC" -> proc_def
include : "USE" ESCAPED_STRING -> include
comment : /#.*/ -> comment
if_cmd : "IF" "(" condition ")" "THEN" command ("ELSE" command)? -> if_cmd
repeat_cmd : "REPEAT" "(" command "," repeat_count ")" -> repeat_cmd
condition : exists_cond | js_cond
exists_cond : "EXISTS" BACKTICK_STRING -> exists_cond
js_cond : BACKTICK_STRING -> js_cond
repeat_count : NUMBER | BACKTICK_STRING
coords : NUMBER NUMBER
value : ESCAPED_STRING | NUMBER
DIR : /(UP|DOWN|LEFT|RIGHT)/i
REST : /[^\n]+/
URL : /(http|https):\/\/[^\s]+/
NAME : /\$?[A-Za-z_][A-Za-z0-9_]*/
WORD : /[A-Za-z0-9+]+/
BACKTICK_STRING : /`[^`]*`/
%import common.NUMBER
%import common.ESCAPED_STRING
%import common.WS_INLINE
%import common.NEWLINE
%ignore WS_INLINE
%ignore NEWLINE
"""
# --------------------------------------------------------------------------- #
# 2. IR dataclasses
# --------------------------------------------------------------------------- #
@dataclass
class Cmd:
op: str
args: List[Any]
@dataclass
class Proc:
name: str
body: List[Cmd]
# --------------------------------------------------------------------------- #
# 3. AST → IR
# --------------------------------------------------------------------------- #
@v_args(inline=True)
class ASTBuilder(Transformer):
# helpers
def _strip(self, s):
if s.startswith('"') and s.endswith('"'):
return s[1:-1]
elif s.startswith('`') and s.endswith('`'):
return s[1:-1]
return s
def start(self,*i): return list(i)
def line(self,i): return i
def command(self,i): return i
# WAIT
def wait_cmd(self, rest, timeout=None):
rest_str = str(rest)
# Check if it's a number (including floats)
try:
num_val = float(rest_str)
payload = (num_val, "seconds")
except ValueError:
if rest_str.startswith('"') and rest_str.endswith('"'):
payload = (self._strip(rest_str), "text")
elif rest_str.startswith('`') and rest_str.endswith('`'):
payload = (self._strip(rest_str), "selector")
else:
payload = (rest_str, "selector")
return Cmd("WAIT", [payload, int(timeout) if timeout else None])
# NAV
def go(self,u): return Cmd("GO",[str(u)])
def reload(self): return Cmd("RELOAD",[])
def back(self): return Cmd("BACK",[])
def forward(self): return Cmd("FORWARD",[])
# CLICK, DOUBLE_CLICK, RIGHT_CLICK
def click(self, *args):
return self._handle_click("CLICK", args)
def double_click(self, *args):
return self._handle_click("DBLCLICK", args)
def right_click(self, *args):
return self._handle_click("RIGHTCLICK", args)
def _handle_click(self, op, args):
if len(args) == 1:
# Single argument - backtick string
target = self._strip(str(args[0]))
return Cmd(op, [("selector", target)])
else:
# Two arguments - coordinates
x, y = args
return Cmd(op, [("coords", int(x), int(y))])
# MOVE / DRAG / SCROLL
def coords(self,x,y): return ("coords",int(x),int(y))
def move(self,c): return Cmd("MOVE",[c])
def drag(self,c1,c2): return Cmd("DRAG",[c1,c2])
def scroll(self,dir_tok,amt=None):
return Cmd("SCROLL",[dir_tok.upper(), int(amt) if amt else 500])
# KEYS
def type(self,tok): return Cmd("TYPE",[self._strip(str(tok))])
def press(self,w): return Cmd("PRESS",[str(w)])
def key_down(self,w): return Cmd("KEYDOWN",[str(w)])
def key_up(self,w): return Cmd("KEYUP",[str(w)])
# FLOW
def eval_cmd(self,txt): return Cmd("EVAL",[self._strip(str(txt))])
def set_var(self,n,v):
# v might be a Token or a Tree, extract value properly
if hasattr(v, 'value'):
value = v.value
elif hasattr(v, 'children') and len(v.children) > 0:
value = v.children[0].value
else:
value = str(v)
return Cmd("SET",[str(n), self._strip(value)])
def proc_call(self,n): return Cmd("CALL",[str(n)])
def proc_def(self,n,*body): return Proc(str(n),[b for b in body if isinstance(b,Cmd)])
def include(self,p): return Cmd("INCLUDE",[self._strip(p)])
def comment(self,*_): return Cmd("NOP",[])
# IF-THEN-ELSE and EXISTS
def if_cmd(self, condition, then_cmd, else_cmd=None):
return Cmd("IF", [condition, then_cmd, else_cmd])
def condition(self, cond):
return cond
def exists_cond(self, selector):
return ("EXISTS", self._strip(str(selector)))
def js_cond(self, expr):
return ("JS", self._strip(str(expr)))
# REPEAT
def repeat_cmd(self, cmd, count):
return Cmd("REPEAT", [cmd, count])
def repeat_count(self, value):
return str(value)
# --------------------------------------------------------------------------- #
# 4. Compiler
# --------------------------------------------------------------------------- #
class Compiler:
def __init__(self, root: pathlib.Path|None=None):
self.parser = Lark(GRAMMAR,start="start",parser="lalr")
self.root = pathlib.Path(root or ".").resolve()
self.vars: Dict[str,Any] = {}
self.procs: Dict[str,Proc]= {}
def compile(self, text: Union[str, List[str]]) -> List[str]:
# Handle list input by joining with newlines
if isinstance(text, list):
text = '\n'.join(text)
ir = self._parse_with_includes(text)
ir = self._collect_procs(ir)
ir = self._inline_calls(ir)
ir = self._apply_set_vars(ir)
return [self._emit_js(c) for c in ir if isinstance(c,Cmd) and c.op!="NOP"]
# passes
def _parse_with_includes(self,txt,seen=None):
seen=seen or set()
cmds=ASTBuilder().transform(self.parser.parse(txt))
out=[]
for c in cmds:
if isinstance(c,Cmd) and c.op=="INCLUDE":
p=(self.root/c.args[0]).resolve()
if p in seen: raise ValueError(f"Circular include {p}")
seen.add(p); out+=self._parse_with_includes(p.read_text(),seen)
else: out.append(c)
return out
def _collect_procs(self,ir):
out=[]
for i in ir:
if isinstance(i,Proc): self.procs[i.name]=i
else: out.append(i)
return out
def _inline_calls(self,ir):
out=[]
for c in ir:
if isinstance(c,Cmd) and c.op=="CALL":
if c.args[0] not in self.procs:
raise ValueError(f"Unknown procedure {c.args[0]!r}")
out+=self._inline_calls(self.procs[c.args[0]].body)
else: out.append(c)
return out
def _apply_set_vars(self,ir):
def sub(s): return re.sub(r"\$(\w+)",lambda m:str(self.vars.get(m.group(1),m.group(0))) ,s) if isinstance(s,str) else s
out=[]
for c in ir:
if isinstance(c,Cmd):
if c.op=="SET": self.vars[c.args[0].lstrip('$')]=c.args[1]
else:
if c.op in("TYPE","EVAL"): c.args=[sub(a) for a in c.args]
out.append(c)
return out
# JS emitter
def _emit_js(self, cmd: Cmd) -> str:
op, a = cmd.op, cmd.args
if op == "GO": return f"location.href = '{a[0]}';"
if op == "RELOAD": return "location.reload();"
if op == "BACK": return "history.back();"
if op == "FORWARD": return "history.forward();"
if op == "WAIT":
arg, kind = a[0]
timeout = a[1] or 10
if kind == "seconds":
return f"await new Promise(r=>setTimeout(r,{arg}*1000));"
if kind == "selector":
sel = arg.replace("\\","\\\\").replace("'","\\'")
return textwrap.dedent(f"""
await new Promise((res,rej)=>{{
const max = {timeout*1000}, t0 = performance.now();
const id = setInterval(()=>{{
if(document.querySelector('{sel}')){{clearInterval(id);res();}}
else if(performance.now()-t0>max){{clearInterval(id);rej('WAIT selector timeout');}}
}},100);
}});
""").strip()
if kind == "text":
txt = arg.replace('`', '\\`')
return textwrap.dedent(f"""
await new Promise((res,rej)=>{{
const max={timeout*1000},t0=performance.now();
const id=setInterval(()=>{{
if(document.body.innerText.includes(`{txt}`)){{clearInterval(id);res();}}
else if(performance.now()-t0>max){{clearInterval(id);rej('WAIT text timeout');}}
}},100);
}});
""").strip()
# click-style helpers
def _js_click(sel, evt="click", button=0, detail=1):
sel = sel.replace("'", "\\'")
return f"document.querySelector('{sel}')?.dispatchEvent(new MouseEvent('{evt}',{{bubbles:true,button:{button},detail:{detail}}}));"
def _js_click_xy(x, y, evt="click", button=0, detail=1):
return textwrap.dedent(f"""
(()=>{{
const el=document.elementFromPoint({x},{y});
el&&el.dispatchEvent(new MouseEvent('{evt}',{{bubbles:true,button:{button},detail:{detail}}}));
}})();
""").strip()
if op in ("CLICK", "DBLCLICK", "RIGHTCLICK"):
evt = {"CLICK":"click","DBLCLICK":"dblclick","RIGHTCLICK":"contextmenu"}[op]
btn = 2 if op=="RIGHTCLICK" else 0
det = 2 if op=="DBLCLICK" else 1
kind,*rest = a[0]
return _js_click_xy(*rest) if kind=="coords" else _js_click(rest[0],evt,btn,det)
if op == "MOVE":
_, x, y = a[0]
return textwrap.dedent(f"""
document.dispatchEvent(new MouseEvent('mousemove',{{clientX:{x},clientY:{y},bubbles:true}}));
""").strip()
if op == "DRAG":
(_, x1, y1), (_, x2, y2) = a
return textwrap.dedent(f"""
(()=>{{
const s=document.elementFromPoint({x1},{y1});
if(!s) return;
s.dispatchEvent(new MouseEvent('mousedown',{{bubbles:true,clientX:{x1},clientY:{y1}}}));
document.dispatchEvent(new MouseEvent('mousemove',{{bubbles:true,clientX:{x2},clientY:{y2}}}));
document.dispatchEvent(new MouseEvent('mouseup', {{bubbles:true,clientX:{x2},clientY:{y2}}}));
}})();
""").strip()
if op == "SCROLL":
dir_, amt = a
dx, dy = {"UP":(0,-amt),"DOWN":(0,amt),"LEFT":(-amt,0),"RIGHT":(amt,0)}[dir_]
return f"window.scrollBy({dx},{dy});"
if op == "TYPE":
txt = a[0].replace("'", "\\'")
return textwrap.dedent(f"""
(()=>{{
const el=document.activeElement;
if(el){{
el.value += '{txt}';
el.dispatchEvent(new Event('input',{{bubbles:true}}));
}}
}})();
""").strip()
if op in ("PRESS","KEYDOWN","KEYUP"):
key = a[0]
evs = {"PRESS":("keydown","keyup"),"KEYDOWN":("keydown",),"KEYUP":("keyup",)}[op]
return ";".join([f"document.dispatchEvent(new KeyboardEvent('{e}',{{key:'{key}',bubbles:true}}))" for e in evs]) + ";"
if op == "EVAL":
return a[0]
if op == "IF":
condition, then_cmd, else_cmd = a
cond_type, cond_value = condition
# Generate condition JavaScript
if cond_type == "EXISTS":
js_condition = f"!!document.querySelector('{cond_value}')"
else: # JS condition
js_condition = cond_value
# Generate commands - handle both regular commands and procedure calls
then_js = self._handle_cmd_or_proc(then_cmd)
else_js = self._handle_cmd_or_proc(else_cmd) if else_cmd else ""
if else_cmd:
return textwrap.dedent(f"""
if ({js_condition}) {{
{then_js}
}} else {{
{else_js}
}}
""").strip()
else:
return textwrap.dedent(f"""
if ({js_condition}) {{
{then_js}
}}
""").strip()
if op == "REPEAT":
cmd, count = a
# Handle the count - could be number or JS expression
if count.isdigit():
# Simple number
repeat_js = self._handle_cmd_or_proc(cmd)
return textwrap.dedent(f"""
for (let _i = 0; _i < {count}; _i++) {{
{repeat_js}
}}
""").strip()
else:
# JS expression (from backticks)
count_expr = count[1:-1] if count.startswith('`') and count.endswith('`') else count
repeat_js = self._handle_cmd_or_proc(cmd)
return textwrap.dedent(f"""
(()=>{{
const _count = {count_expr};
if (typeof _count === 'number') {{
for (let _i = 0; _i < _count; _i++) {{
{repeat_js}
}}
}} else if (_count) {{
{repeat_js}
}}
}})();
""").strip()
raise ValueError(f"Unhandled op {op}")
def _handle_cmd_or_proc(self, cmd):
"""Handle a command that might be a regular command or a procedure call"""
if not cmd:
return ""
if isinstance(cmd, Cmd):
if cmd.op == "CALL":
# Inline the procedure
if cmd.args[0] not in self.procs:
raise ValueError(f"Unknown procedure {cmd.args[0]!r}")
proc_body = self.procs[cmd.args[0]].body
return "\n".join([self._emit_js(c) for c in proc_body if c.op != "NOP"])
else:
return self._emit_js(cmd)
return ""
# --------------------------------------------------------------------------- #
# 5. Helpers + demo
# --------------------------------------------------------------------------- #
def compile_string(script: Union[str, List[str]], *, root: Union[pathlib.Path, None] = None) -> List[str]:
"""Compile C4A-Script from string or list of strings to JavaScript.
Args:
script: C4A-Script as a string or list of command strings
root: Root directory for resolving includes (optional)
Returns:
List of JavaScript command strings
Raises:
C4AScriptError: When compilation fails with detailed error information
"""
try:
return Compiler(root).compile(script)
except Exception as e:
# Wrap the error with better formatting
raise C4AScriptError.from_exception(e, script)
def compile_file(path: pathlib.Path) -> List[str]:
"""Compile C4A-Script from file to JavaScript.
Args:
path: Path to C4A-Script file
Returns:
List of JavaScript command strings
"""
return compile_string(path.read_text(), root=path.parent)
def compile_lines(lines: List[str], *, root: Union[pathlib.Path, None] = None) -> List[str]:
"""Compile C4A-Script from list of lines to JavaScript.
Args:
lines: List of C4A-Script command lines
root: Root directory for resolving includes (optional)
Returns:
List of JavaScript command strings
"""
return compile_string(lines, root=root)
DEMO = """
# quick sanity demo
PROC login
CLICK `input[name="username"]`
TYPE $user
PRESS Tab
TYPE $pass
CLICK `button.submit`
ENDPROC
SET user = "tom@crawl4ai.com"
SET pass = "hunter2"
GO https://example.com/login
WAIT `input[name="username"]` 10
login
WAIT 3
EVAL `console.log('logged in')`
"""
if __name__ == "__main__":
if len(sys.argv) == 2:
for js in compile_file(pathlib.Path(sys.argv[1])):
print(js)
else:
print("=== DEMO ===")
for js in compile_string(DEMO):
print(js)

View File

@@ -0,0 +1,312 @@
# C4A-Script Language Documentation
C4A-Script (Crawl4AI Script) is a simple, powerful language for web automation. Write human-readable commands that compile to JavaScript for browser automation.
## Quick Start
```python
from c4a_compile import compile
# Write your script
script = """
GO https://example.com
WAIT `#content` 5
CLICK `button.submit`
"""
# Compile to JavaScript
result = compile(script)
if result.success:
# Use with Crawl4AI
config = CrawlerRunConfig(js_code=result.js_code)
else:
print(f"Error at line {result.first_error.line}: {result.first_error.message}")
```
## Language Basics
- **One command per line**
- **Selectors in backticks**: `` `button.submit` ``
- **Strings in quotes**: `"Hello World"`
- **Variables with $**: `$username`
- **Comments with #**: `# This is a comment`
## Commands Reference
### Navigation
```c4a
GO https://example.com # Navigate to URL
RELOAD # Reload current page
BACK # Go back in history
FORWARD # Go forward in history
```
### Waiting
```c4a
WAIT 3 # Wait 3 seconds
WAIT `#content` 10 # Wait for element (max 10 seconds)
WAIT "Loading complete" 5 # Wait for text to appear
```
### Mouse Actions
```c4a
CLICK `button.submit` # Click element
DOUBLE_CLICK `.item` # Double-click element
RIGHT_CLICK `#menu` # Right-click element
CLICK 100 200 # Click at coordinates
MOVE 500 300 # Move mouse to position
DRAG 100 100 500 300 # Drag from one point to another
SCROLL DOWN 500 # Scroll down 500 pixels
SCROLL UP # Scroll up (default 500px)
SCROLL LEFT 200 # Scroll left 200 pixels
SCROLL RIGHT # Scroll right
```
### Keyboard
```c4a
TYPE "hello@example.com" # Type text
TYPE $email # Type variable value
PRESS Tab # Press and release key
PRESS Enter
PRESS Escape
KEY_DOWN Shift # Hold key down
KEY_UP Shift # Release key
```
### Control Flow
#### IF-THEN-ELSE
```c4a
# Check if element exists
IF (EXISTS `.cookie-banner`) THEN CLICK `.accept`
IF (EXISTS `#user`) THEN CLICK `.logout` ELSE CLICK `.login`
# JavaScript conditions
IF (`window.innerWidth < 768`) THEN CLICK `.mobile-menu`
IF (`document.querySelectorAll('.item').length > 10`) THEN SCROLL DOWN
```
#### REPEAT
```c4a
# Repeat fixed number of times
REPEAT (CLICK `.next`, 5)
# Repeat based on JavaScript expression
REPEAT (SCROLL DOWN 300, `document.querySelectorAll('.item').length`)
# Repeat while condition is true (like while loop)
REPEAT (CLICK `.load-more`, `document.querySelector('.load-more') !== null`)
```
### Variables & JavaScript
```c4a
# Set variables
SET username = "john@example.com"
SET count = "10"
# Use variables
TYPE $username
# Execute JavaScript
EVAL `console.log('Hello')`
EVAL `localStorage.setItem('key', 'value')`
```
### Procedures
```c4a
# Define reusable procedure
PROC login
CLICK `#email`
TYPE $email
CLICK `#password`
TYPE $password
CLICK `button[type="submit"]`
ENDPROC
# Use procedure
SET email = "user@example.com"
SET password = "secure123"
login
# Procedures work with control flow
IF (EXISTS `.login-form`) THEN login
REPEAT (process_item, 10)
```
## API Reference
### Functions
```python
from c4a_compile import compile, validate, compile_file
# Compile script
result = compile("GO https://example.com")
# Validate syntax only
result = validate(script)
# Compile from file
result = compile_file("script.c4a")
```
### Working with Results
```python
result = compile(script)
if result.success:
# Access generated JavaScript
js_code = result.js_code # List[str]
# Use with Crawl4AI
config = CrawlerRunConfig(js_code=js_code)
else:
# Handle errors
error = result.first_error
print(f"Line {error.line}, Column {error.column}: {error.message}")
# Get suggestions
for suggestion in error.suggestions:
print(f"Fix: {suggestion.message}")
# Get JSON for UI integration
error_json = result.to_json()
```
## Examples
### Basic Automation
```c4a
GO https://example.com
WAIT `#content` 5
IF (EXISTS `.cookie-notice`) THEN CLICK `.accept`
CLICK `.main-button`
```
### Form Filling
```c4a
SET email = "user@example.com"
SET message = "Hello, I need help with my order"
GO https://example.com/contact
WAIT `form` 5
CLICK `input[name="email"]`
TYPE $email
CLICK `textarea[name="message"]`
TYPE $message
CLICK `button[type="submit"]`
WAIT "Thank you" 10
```
### Dynamic Content Loading
```c4a
GO https://shop.example.com
WAIT `.product-list` 10
# Load all products
REPEAT (CLICK `.load-more`, `document.querySelector('.load-more') !== null`)
# Extract data
EVAL `
const count = document.querySelectorAll('.product').length;
console.log('Found ' + count + ' products');
`
```
### Smart Navigation
```c4a
PROC handle_popups
IF (EXISTS `.cookie-banner`) THEN CLICK `.accept-all`
IF (EXISTS `.newsletter-modal`) THEN CLICK `.close`
ENDPROC
GO https://example.com
handle_popups
WAIT `.main-content` 5
# Navigate based on login state
IF (EXISTS `.user-avatar`) THEN CLICK `.dashboard` ELSE CLICK `.login`
```
## Error Messages
C4A-Script provides clear, helpful error messages:
```
============================================================
Syntax Error [E001]
============================================================
Location: Line 3, Column 23
Error: Missing 'THEN' keyword after IF condition
Code:
3 | IF (EXISTS `.button`) CLICK `.button`
| ^
Suggestions:
1. Add 'THEN' after the condition
============================================================
```
Common error codes:
- **E001**: Missing 'THEN' keyword
- **E002**: Missing closing parenthesis
- **E003**: Missing comma in REPEAT
- **E004**: Missing ENDPROC
- **E005**: Undefined procedure
- **E006**: Missing backticks for selector
## Best Practices
1. **Always use backticks for selectors**: `` CLICK `button` `` not `CLICK button`
2. **Check element existence before interaction**: `IF (EXISTS `.modal`) THEN CLICK `.close`
3. **Set appropriate wait times**: Don't wait too long or too short
4. **Use procedures for repeated actions**: Keep your code DRY
5. **Add comments for clarity**: `# Check if user is logged in`
## Integration with Crawl4AI
```python
from c4a_compile import compile
from crawl4ai import CrawlerRunConfig, WebCrawler
# Compile your script
script = """
GO https://example.com
WAIT `.content` 5
CLICK `.load-more`
"""
result = compile(script)
if result.success:
# Create crawler config with compiled JS
config = CrawlerRunConfig(
js_code=result.js_code,
wait_for="css:.results"
)
# Run crawler
async with WebCrawler() as crawler:
result = await crawler.arun(config=config)
```
That's it! You're ready to automate the web with C4A-Script.

View File

@@ -0,0 +1,217 @@
"""
C4A-Script API Usage Examples
Shows how to use the new Result-based API in various scenarios
"""
from c4a_compile import compile, validate, compile_file
from c4a_result import CompilationResult, ValidationResult
import json
print("C4A-Script API Usage Examples")
print("=" * 80)
# Example 1: Basic compilation
print("\n1. Basic Compilation")
print("-" * 40)
script = """
GO https://example.com
WAIT 2
IF (EXISTS `.cookie-banner`) THEN CLICK `.accept`
REPEAT (SCROLL DOWN 300, 3)
"""
result = compile(script)
print(f"Success: {result.success}")
print(f"Statements generated: {len(result.js_code) if result.js_code else 0}")
# Example 2: Error handling
print("\n\n2. Error Handling")
print("-" * 40)
error_script = """
GO https://example.com
IF (EXISTS `.modal`) CLICK `.close`
undefined_procedure
"""
result = compile(error_script)
if not result.success:
# Access error details
error = result.first_error
print(f"Error on line {error.line}: {error.message}")
print(f"Error code: {error.code}")
# Show suggestions if available
if error.suggestions:
print("Suggestions:")
for suggestion in error.suggestions:
print(f" - {suggestion.message}")
# Example 3: Validation only
print("\n\n3. Validation (no code generation)")
print("-" * 40)
validation_script = """
PROC validate_form
IF (EXISTS `#email`) THEN TYPE "test@example.com"
PRESS Tab
ENDPROC
validate_form
"""
validation = validate(validation_script)
print(f"Valid: {validation.valid}")
if validation.errors:
print(f"Errors found: {len(validation.errors)}")
# Example 4: JSON output for UI
print("\n\n4. JSON Output for UI Integration")
print("-" * 40)
ui_script = """
CLICK button.submit
"""
result = compile(ui_script)
if not result.success:
# Get JSON for UI
error_json = result.to_dict()
print("Error data for UI:")
print(json.dumps(error_json["errors"][0], indent=2))
# Example 5: File compilation
print("\n\n5. File Compilation")
print("-" * 40)
# Create a test file
test_file = "test_script.c4a"
with open(test_file, "w") as f:
f.write("""
GO https://example.com
WAIT `.content` 5
CLICK `.main-button`
""")
result = compile_file(test_file)
print(f"File compilation: {'Success' if result.success else 'Failed'}")
if result.success:
print(f"Generated {len(result.js_code)} JavaScript statements")
# Clean up
import os
os.remove(test_file)
# Example 6: Batch processing
print("\n\n6. Batch Processing Multiple Scripts")
print("-" * 40)
scripts = [
"GO https://example1.com\nCLICK `.button`",
"GO https://example2.com\nWAIT 2",
"GO https://example3.com\nINVALID_CMD"
]
results = []
for i, script in enumerate(scripts, 1):
result = compile(script)
results.append(result)
status = "" if result.success else ""
print(f"Script {i}: {status}")
# Summary
successful = sum(1 for r in results if r.success)
print(f"\nBatch result: {successful}/{len(scripts)} successful")
# Example 7: Custom error formatting
print("\n\n7. Custom Error Formatting")
print("-" * 40)
def format_error_for_ide(error):
"""Format error for IDE integration"""
return f"{error.source_line}:{error.line}:{error.column}: {error.type.value}: {error.message} [{error.code}]"
error_script = "IF EXISTS `.button` THEN CLICK `.button`"
result = compile(error_script)
if not result.success:
error = result.first_error
print("IDE format:", format_error_for_ide(error))
print("Simple format:", error.simple_message)
print("Full format:", error.formatted_message)
# Example 8: Working with warnings (future feature)
print("\n\n8. Handling Warnings")
print("-" * 40)
# In the future, we might have warnings
result = compile("GO https://example.com\nWAIT 100") # Very long wait
print(f"Success: {result.success}")
print(f"Warnings: {len(result.warnings)}")
# Example 9: Metadata usage
print("\n\n9. Using Metadata")
print("-" * 40)
complex_script = """
PROC helper1
CLICK `.btn1`
ENDPROC
PROC helper2
CLICK `.btn2`
ENDPROC
GO https://example.com
helper1
helper2
"""
result = compile(complex_script)
if result.success:
print(f"Script metadata:")
for key, value in result.metadata.items():
print(f" {key}: {value}")
# Example 10: Integration patterns
print("\n\n10. Integration Patterns")
print("-" * 40)
# Web API endpoint simulation
def api_compile(request_body):
"""Simulate API endpoint"""
script = request_body.get("script", "")
result = compile(script)
response = {
"status": "success" if result.success else "error",
"data": result.to_dict()
}
return response
# CLI tool simulation
def cli_compile(script, output_format="text"):
"""Simulate CLI tool"""
result = compile(script)
if output_format == "json":
return result.to_json()
elif output_format == "simple":
if result.success:
return f"OK: {len(result.js_code)} statements"
else:
return f"ERROR: {result.first_error.simple_message}"
else:
return str(result)
# Test the patterns
api_response = api_compile({"script": "GO https://example.com"})
print(f"API response status: {api_response['status']}")
cli_output = cli_compile("WAIT 2", "simple")
print(f"CLI output: {cli_output}")
print("\n" + "=" * 80)
print("All examples completed successfully!")

View File

@@ -0,0 +1,53 @@
"""
C4A-Script Hello World
A concise example showing how to use the C4A-Script compiler
"""
from c4a_compile import compile
# Define your C4A-Script
script = """
GO https://example.com
WAIT `#content` 5
IF (EXISTS `.cookie-banner`) THEN CLICK `.accept`
CLICK `button.submit`
"""
# Compile the script
result = compile(script)
# Check if compilation was successful
if result.success:
# Success! Use the generated JavaScript
print("✅ Compilation successful!")
print(f"Generated {len(result.js_code)} JavaScript statements:\n")
for i, js in enumerate(result.js_code, 1):
print(f"{i}. {js}\n")
# In real usage, you'd pass result.js_code to Crawl4AI:
# config = CrawlerRunConfig(js_code=result.js_code)
else:
# Error! Handle the compilation error
print("❌ Compilation failed!")
# Get the first error (there might be multiple)
error = result.first_error
# Show error details
print(f"Error at line {error.line}, column {error.column}")
print(f"Message: {error.message}")
# Show the problematic code
print(f"\nCode: {error.source_line}")
print(" " * (6 + error.column) + "^")
# Show suggestions if available
if error.suggestions:
print("\n💡 How to fix:")
for suggestion in error.suggestions:
print(f" {suggestion.message}")
# For debugging or logging, you can also get JSON
# error_json = result.to_json()

View File

@@ -0,0 +1,53 @@
"""
C4A-Script Hello World - Error Example
Shows how error handling works
"""
from c4a_compile import compile
# Define a script with an error (missing THEN)
script = """
GO https://example.com
WAIT `#content` 5
IF (EXISTS `.cookie-banner`) CLICK `.accept`
CLICK `button.submit`
"""
# Compile the script
result = compile(script)
# Check if compilation was successful
if result.success:
# Success! Use the generated JavaScript
print("✅ Compilation successful!")
print(f"Generated {len(result.js_code)} JavaScript statements:\n")
for i, js in enumerate(result.js_code, 1):
print(f"{i}. {js}\n")
# In real usage, you'd pass result.js_code to Crawl4AI:
# config = CrawlerRunConfig(js_code=result.js_code)
else:
# Error! Handle the compilation error
print("❌ Compilation failed!")
# Get the first error (there might be multiple)
error = result.first_error
# Show error details
print(f"Error at line {error.line}, column {error.column}")
print(f"Message: {error.message}")
# Show the problematic code
print(f"\nCode: {error.source_line}")
print(" " * (6 + error.column) + "^")
# Show suggestions if available
if error.suggestions:
print("\n💡 How to fix:")
for suggestion in error.suggestions:
print(f" {suggestion.message}")
# For debugging or logging, you can also get JSON
# error_json = result.to_json()

View File

@@ -0,0 +1,285 @@
"""
Demonstration of C4A-Script integration with Crawl4AI
Shows various use cases and features
"""
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai import c4a_compile, CompilationResult
async def example_basic_usage():
"""Basic C4A-Script usage with Crawl4AI"""
print("\n" + "="*60)
print("Example 1: Basic C4A-Script Usage")
print("="*60)
# Define your automation script
c4a_script = """
# Wait for page to load
WAIT `body` 2
# Handle cookie banner if present
IF (EXISTS `.cookie-banner`) THEN CLICK `.accept-btn`
# Scroll down to load more content
SCROLL DOWN 500
WAIT 1
# Click load more button if exists
IF (EXISTS `.load-more`) THEN CLICK `.load-more`
"""
# Create crawler config with C4A script
config = CrawlerRunConfig(
url="https://example.com",
c4a_script=c4a_script,
wait_for="css:.content",
verbose=False
)
print("✅ C4A Script compiled successfully!")
print(f"Generated {len(config.js_code)} JavaScript commands")
# In production, you would run:
# async with AsyncWebCrawler() as crawler:
# result = await crawler.arun(config=config)
async def example_form_filling():
"""Form filling with C4A-Script"""
print("\n" + "="*60)
print("Example 2: Form Filling with C4A-Script")
print("="*60)
# Form automation script
form_script = """
# Set form values
SET email = "test@example.com"
SET message = "This is a test message"
# Fill the form
CLICK `#email-input`
TYPE $email
CLICK `#message-textarea`
TYPE $message
# Submit the form
CLICK `button[type="submit"]`
# Wait for success message
WAIT `.success-message` 10
"""
config = CrawlerRunConfig(
url="https://example.com/contact",
c4a_script=form_script
)
print("✅ Form filling script ready")
print("Script will:")
print(" - Fill email field")
print(" - Fill message textarea")
print(" - Submit form")
print(" - Wait for confirmation")
async def example_dynamic_loading():
"""Handle dynamic content loading"""
print("\n" + "="*60)
print("Example 3: Dynamic Content Loading")
print("="*60)
# Script for infinite scroll or pagination
pagination_script = """
# Initial wait
WAIT `.product-list` 5
# Load all products by clicking "Load More" repeatedly
REPEAT (CLICK `.load-more`, `document.querySelector('.load-more') !== null`)
# Alternative: Scroll to load (infinite scroll)
# REPEAT (SCROLL DOWN 1000, `document.querySelectorAll('.product').length < 100`)
# Extract count
EVAL `console.log('Products loaded: ' + document.querySelectorAll('.product').length)`
"""
config = CrawlerRunConfig(
url="https://example.com/products",
c4a_script=pagination_script,
screenshot=True # Capture final state
)
print("✅ Dynamic loading script ready")
print("Script will load all products by repeatedly clicking 'Load More'")
async def example_multi_step_workflow():
"""Complex multi-step workflow with procedures"""
print("\n" + "="*60)
print("Example 4: Multi-Step Workflow with Procedures")
print("="*60)
# Complex workflow with reusable procedures
workflow_script = """
# Define login procedure
PROC login
CLICK `#username`
TYPE "demo_user"
CLICK `#password`
TYPE "demo_pass"
CLICK `#login-btn`
WAIT `.dashboard` 10
ENDPROC
# Define search procedure
PROC search_product
CLICK `.search-box`
TYPE "laptop"
PRESS Enter
WAIT `.search-results` 5
ENDPROC
# Main workflow
GO https://example.com
login
search_product
# Process results
IF (EXISTS `.no-results`) THEN EVAL `console.log('No products found')`
ELSE REPEAT (CLICK `.add-to-cart`, 3)
"""
# Compile to check for errors
result = c4a_compile(workflow_script)
if result.success:
print("✅ Complex workflow compiled successfully!")
print("Workflow includes:")
print(" - Login procedure")
print(" - Product search")
print(" - Conditional cart additions")
config = CrawlerRunConfig(
url="https://example.com",
c4a_script=workflow_script
)
else:
print("❌ Compilation error:")
error = result.first_error
print(f" Line {error.line}: {error.message}")
async def example_error_handling():
"""Demonstrate error handling"""
print("\n" + "="*60)
print("Example 5: Error Handling")
print("="*60)
# Script with intentional error
bad_script = """
WAIT body 2
CLICK button
IF (EXISTS .modal) CLICK .close
"""
try:
config = CrawlerRunConfig(
url="https://example.com",
c4a_script=bad_script
)
except ValueError as e:
print("✅ Error caught as expected:")
print(f" {e}")
# Fixed version
good_script = """
WAIT `body` 2
CLICK `button`
IF (EXISTS `.modal`) THEN CLICK `.close`
"""
config = CrawlerRunConfig(
url="https://example.com",
c4a_script=good_script
)
print("\n✅ Fixed script compiled successfully!")
async def example_combining_with_extraction():
"""Combine C4A-Script with extraction strategies"""
print("\n" + "="*60)
print("Example 6: C4A-Script + Extraction Strategies")
print("="*60)
from crawl4ai import JsonCssExtractionStrategy
# Script to prepare page for extraction
prep_script = """
# Expand all collapsed sections
REPEAT (CLICK `.expand-btn`, `document.querySelectorAll('.expand-btn:not(.expanded)').length > 0`)
# Load all comments
IF (EXISTS `.load-comments`) THEN CLICK `.load-comments`
WAIT `.comments-section` 5
# Close any popups
IF (EXISTS `.popup-close`) THEN CLICK `.popup-close`
"""
# Define extraction schema
schema = {
"name": "article",
"selector": "article.main",
"fields": {
"title": {"selector": "h1", "type": "text"},
"content": {"selector": ".content", "type": "text"},
"comments": {
"selector": ".comment",
"type": "list",
"fields": {
"author": {"selector": ".author", "type": "text"},
"text": {"selector": ".text", "type": "text"}
}
}
}
}
config = CrawlerRunConfig(
url="https://example.com/article",
c4a_script=prep_script,
extraction_strategy=JsonCssExtractionStrategy(schema),
wait_for="css:.comments-section"
)
print("✅ Combined C4A + Extraction ready")
print("Workflow:")
print(" 1. Expand collapsed sections")
print(" 2. Load comments")
print(" 3. Extract structured data")
async def main():
"""Run all examples"""
print("\n🚀 C4A-Script + Crawl4AI Integration Demo\n")
# Run all examples
await example_basic_usage()
await example_form_filling()
await example_dynamic_loading()
await example_multi_step_workflow()
await example_error_handling()
await example_combining_with_extraction()
print("\n" + "="*60)
print("✅ All examples completed successfully!")
print("="*60)
print("\nTo run actual crawls, uncomment the AsyncWebCrawler sections")
print("or create your own scripts using these examples as templates.")
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,7 @@
GO https://store.example.com/product/laptop
WAIT `.product-details` 8
CLICK `button.add-to-cart`
WAIT `.cart-notification` 3
CLICK `.cart-icon`
WAIT `.checkout-btn` 5
CLICK `.checkout-btn`

View File

@@ -0,0 +1,43 @@
# Advanced control flow with IF, EXISTS, and REPEAT
# Define reusable procedures
PROC handle_cookie_banner
IF (EXISTS `.cookie-banner`) THEN CLICK `.accept-cookies`
IF (EXISTS `.privacy-notice`) THEN CLICK `.dismiss-privacy`
ENDPROC
PROC scroll_to_load
SCROLL DOWN 500
WAIT 0.5
ENDPROC
PROC try_login
CLICK `#email`
TYPE "user@example.com"
CLICK `#password`
TYPE "secure123"
CLICK `button[type="submit"]`
WAIT 2
ENDPROC
# Main script
GO https://example.com
WAIT 2
# Handle popups
handle_cookie_banner
# Conditional navigation based on login state
IF (EXISTS `.user-menu`) THEN CLICK `.dashboard-link` ELSE try_login
# Repeat scrolling based on content count
REPEAT (scroll_to_load, 5)
# Load more content while button exists
REPEAT (CLICK `.load-more`, `document.querySelector('.load-more') && !document.querySelector('.no-more-content')`)
# Process items conditionally
IF (`document.querySelectorAll('.item').length > 10`) THEN EVAL `console.log('Found ' + document.querySelectorAll('.item').length + ' items')`
# Complex condition with viewport check
IF (`window.innerWidth < 768 && document.querySelector('.mobile-menu')`) THEN CLICK `.mobile-menu-toggle`

View File

@@ -0,0 +1,8 @@
GO https://myapp.com
WAIT 2
IF (EXISTS `.user-avatar`) THEN CLICK `.logout` ELSE CLICK `.login`
WAIT `#auth-form` 5
IF (EXISTS `#auth-form`) THEN TYPE "user@example.com"
IF (EXISTS `#auth-form`) THEN PRESS Tab
IF (EXISTS `#auth-form`) THEN TYPE "password123"
IF (EXISTS `#auth-form`) THEN CLICK `button[type="submit"]`

View File

@@ -0,0 +1,56 @@
# Data extraction example
# Scrapes product information from an e-commerce site
# Navigate to products page
GO https://shop.example.com/products
WAIT `.product-list` 10
# Scroll to load lazy-loaded content
SCROLL DOWN 500
WAIT 1
SCROLL DOWN 500
WAIT 1
SCROLL DOWN 500
WAIT 2
# Extract product data
EVAL `
// Extract all product information
const products = Array.from(document.querySelectorAll('.product-card')).map((card, index) => {
return {
id: index + 1,
name: card.querySelector('.product-title')?.textContent?.trim() || 'N/A',
price: card.querySelector('.price')?.textContent?.trim() || 'N/A',
rating: card.querySelector('.rating')?.textContent?.trim() || 'N/A',
availability: card.querySelector('.in-stock') ? 'In Stock' : 'Out of Stock',
image: card.querySelector('img')?.src || 'N/A'
};
});
// Log results
console.log('=== Product Extraction Results ===');
console.log('Total products found:', products.length);
console.log(JSON.stringify(products, null, 2));
// Save to localStorage for retrieval
localStorage.setItem('scraped_products', JSON.stringify(products));
`
# Optional: Click on first product for details
CLICK `.product-card:first-child`
WAIT `.product-details` 5
# Extract detailed information
EVAL `
const details = {
description: document.querySelector('.product-description')?.textContent?.trim(),
specifications: Array.from(document.querySelectorAll('.spec-item')).map(spec => ({
label: spec.querySelector('.spec-label')?.textContent,
value: spec.querySelector('.spec-value')?.textContent
})),
reviews: document.querySelector('.review-count')?.textContent
};
console.log('=== Product Details ===');
console.log(JSON.stringify(details, null, 2));
`

View File

@@ -0,0 +1,8 @@
GO https://company.com/contact
WAIT `form#contact` 10
TYPE "John Smith"
PRESS Tab
TYPE "john@email.com"
PRESS Tab
TYPE "Need help with my order"
CLICK `button[type="submit"]`

View File

@@ -0,0 +1,7 @@
GO https://news.example.com
WAIT `.article-list` 5
REPEAT (SCROLL DOWN 500, 3)
WAIT 1
REPEAT (CLICK `.load-more`, `document.querySelector('.load-more') !== null`)
WAIT 2
IF (`document.querySelectorAll('.article').length > 20`) THEN EVAL `console.log('Loaded enough articles')`

View File

@@ -0,0 +1,36 @@
# Login flow with error handling
# Demonstrates procedures, variables, and conditional checks
# Define login procedure
PROC perform_login
CLICK `input#email`
TYPE $email
CLICK `input#password`
TYPE $password
CLICK `button.login-submit`
ENDPROC
# Set credentials
SET email = "user@example.com"
SET password = "securePassword123"
# Navigate to login page
GO https://app.example.com/login
WAIT `.login-container` 15
# Attempt login
perform_login
# Wait for page to load
WAIT 3
# Check if login was successful
EVAL `
if (document.querySelector('.dashboard')) {
console.log('Login successful - on dashboard');
} else if (document.querySelector('.error-message')) {
console.log('Login failed:', document.querySelector('.error-message').textContent);
} else {
console.log('Unknown state after login');
}
`

View File

@@ -0,0 +1,106 @@
# Multi-step e-commerce workflow
# Complete purchase flow with procedures and error handling
# Reusable procedures
PROC search_product
CLICK `input.search-bar`
TYPE $search_term
PRESS Enter
WAIT `.search-results` 10
ENDPROC
PROC add_first_item_to_cart
CLICK `.product-item:first-child .add-to-cart`
WAIT ".added-to-cart-notification" 3
ENDPROC
PROC go_to_checkout
CLICK `.cart-icon`
WAIT `.cart-drawer` 5
CLICK `button.proceed-to-checkout`
WAIT `.checkout-page` 10
ENDPROC
PROC fill_customer_info
# Billing information
CLICK `#billing-firstname`
TYPE $first_name
CLICK `#billing-lastname`
TYPE $last_name
CLICK `#billing-email`
TYPE $email
CLICK `#billing-phone`
TYPE $phone
# Address
CLICK `#billing-address`
TYPE $address
CLICK `#billing-city`
TYPE $city
CLICK `#billing-state`
TYPE $state
CLICK `#billing-zip`
TYPE $zip
ENDPROC
PROC select_shipping
CLICK `input[value="standard"]`
WAIT 1
ENDPROC
# Set all required variables
SET search_term = "wireless headphones"
SET first_name = "John"
SET last_name = "Doe"
SET email = "john.doe@example.com"
SET phone = "555-0123"
SET address = "123 Main Street"
SET city = "San Francisco"
SET state = "CA"
SET zip = "94105"
# Main workflow starts here
GO https://shop.example.com
WAIT `.homepage-loaded` 10
# Step 1: Search and add to cart
search_product
EVAL `console.log('Found', document.querySelectorAll('.product-item').length, 'products')`
add_first_item_to_cart
# Add a second item
CLICK `.product-item:nth-child(2) .add-to-cart`
WAIT 2
# Step 2: Go to checkout
go_to_checkout
# Step 3: Fill customer information
fill_customer_info
# Step 4: Select shipping method
select_shipping
# Step 5: Continue to payment
CLICK `button.continue-to-payment`
WAIT `.payment-section` 10
# Log order summary
EVAL `
const orderTotal = document.querySelector('.order-total')?.textContent;
const itemCount = document.querySelectorAll('.order-item').length;
console.log('=== Order Summary ===');
console.log('Items:', itemCount);
console.log('Total:', orderTotal);
// Get all items
const items = Array.from(document.querySelectorAll('.order-item')).map(item => ({
name: item.querySelector('.item-name')?.textContent,
quantity: item.querySelector('.item-quantity')?.textContent,
price: item.querySelector('.item-price')?.textContent
}));
console.log('Items:', JSON.stringify(items, null, 2));
`
# Note: Stopping here before actual payment submission
EVAL `console.log('Workflow completed - stopped before payment submission')`

View File

@@ -0,0 +1,8 @@
GO https://app.example.com
WAIT `.nav-menu` 8
CLICK `a[href="/products"]`
WAIT 2
CLICK `a[href="/about"]`
WAIT 2
BACK
WAIT 1

View File

@@ -0,0 +1,8 @@
GO https://myapp.com/login
WAIT `input#email` 5
CLICK `input#email`
TYPE "user@example.com"
PRESS Tab
TYPE "password123"
CLICK `button.login-btn`
WAIT `.dashboard` 10

View File

@@ -0,0 +1,7 @@
GO https://responsive.site.com
WAIT 2
IF (`window.innerWidth < 768`) THEN CLICK `.mobile-menu`
IF (`window.innerWidth < 768`) THEN WAIT `.mobile-nav` 3
IF (`window.innerWidth >= 768`) THEN CLICK `.desktop-menu li:nth-child(2)`
REPEAT (CLICK `.next-slide`, 5)
IF (EXISTS `.cookie-banner`) THEN CLICK `.accept-cookies`

View File

@@ -0,0 +1,8 @@
GO https://news.site.com
WAIT `.article-list` 10
SCROLL DOWN 500
WAIT 1
SCROLL DOWN 500
WAIT 1
CLICK `.article:nth-child(5)`
WAIT `.article-content` 5

View File

@@ -0,0 +1,7 @@
GO https://shop.example.com
WAIT `.search-bar` 10
CLICK `.search-bar`
TYPE "wireless headphones"
PRESS Enter
WAIT `.results` 5
CLICK `.product-card:first-child`

View File

@@ -0,0 +1,19 @@
# Simple form submission example
# This script fills out a contact form and submits it
GO https://example.com/contact
WAIT `form#contact-form` 10
# Fill out the form fields
CLICK `input[name="name"]`
TYPE "Alice Smith"
PRESS Tab
TYPE "alice@example.com"
PRESS Tab
TYPE "I'd like to learn more about your services"
# Submit the form
CLICK `button[type="submit"]`
# Wait for success message
WAIT "Thank you for your message" 5

View File

@@ -0,0 +1,11 @@
PROC fill_field
TYPE "test@example.com"
PRESS Tab
ENDPROC
GO https://forms.example.com
WAIT `form` 5
IF (EXISTS `input[type="email"]`) THEN CLICK `input[type="email"]`
IF (EXISTS `input[type="email"]`) THEN fill_field
REPEAT (PRESS Tab, `document.activeElement.type !== 'submit'`)
CLICK `button[type="submit"]`