feat: add Script Builder to Chrome Extension and reorganize LLM context files
This commit introduces significant enhancements to the Crawl4AI ecosystem: Chrome Extension - Script Builder (Alpha): - Add recording functionality to capture user interactions (clicks, typing, scrolling) - Implement smart event grouping for cleaner script generation - Support export to both JavaScript and C4A script formats - Add timeline view for visualizing and editing recorded actions - Include wait commands (time-based and element-based) - Add saved flows functionality for reusing automation scripts - Update UI with consistent dark terminal theme (Dank Mono font, green/pink accents) - Release new extension versions: v1.1.0, v1.2.0, v1.2.1 LLM Context Builder Improvements: - Reorganize context files from llmtxt/ to llm.txt/ with better structure - Separate diagram templates from text content (diagrams/ and txt/ subdirectories) - Add comprehensive context files for all major Crawl4AI components - Improve file naming convention for better discoverability Documentation Updates: - Update apps index page to match main documentation theme - Standardize color scheme: "Available" tags use primary color (#50ffff) - Change "Coming Soon" tags to dark gray for better visual hierarchy - Add interactive two-column layout for extension landing page - Include code examples for both Schema Builder and Script Builder features Technical Improvements: - Enhance event capture mechanism with better element selection - Add support for contenteditable elements and complex form interactions - Implement proper scroll event handling for both window and element scrolling - Add meta key support for keyboard shortcuts - Improve selector generation for more reliable element targeting The Script Builder is released as Alpha, acknowledging potential bugs while providing early access to this powerful automation recording feature.
This commit is contained in:
@@ -43,23 +43,23 @@
|
||||
<span class="terminal-title">About Crawl4AI Assistant</span>
|
||||
</div>
|
||||
<div class="terminal-content">
|
||||
<p>Transform any website into structured data with just a few clicks! The Crawl4AI Assistant Chrome Extension lets you visually select elements on any webpage and automatically generates Python code for web scraping.</p>
|
||||
<p>Transform any website into structured data with just a few clicks! The Crawl4AI Assistant Chrome Extension provides two powerful tools for web scraping and automation.</p>
|
||||
|
||||
<div class="features-grid">
|
||||
<div class="feature-card">
|
||||
<span class="feature-icon">🎯</span>
|
||||
<h3>Visual Selection</h3>
|
||||
<p>Click on any element to select it - no CSS selectors needed</p>
|
||||
<h3>Schema Builder</h3>
|
||||
<p>Click to select elements and build extraction schemas visually</p>
|
||||
</div>
|
||||
<div class="feature-card">
|
||||
<span class="feature-icon">📊</span>
|
||||
<h3>Schema Builder</h3>
|
||||
<p>Build extraction schemas by clicking on container and field elements</p>
|
||||
<span class="feature-icon">🔴</span>
|
||||
<h3>Script Builder <span style="color: #f380f5; font-size: 0.75rem;">(Alpha)</span></h3>
|
||||
<p>Record browser actions to create automation scripts</p>
|
||||
</div>
|
||||
<div class="feature-card">
|
||||
<span class="feature-icon">🐍</span>
|
||||
<h3>Python Code</h3>
|
||||
<p>Get production-ready Crawl4AI code with LLM extraction</p>
|
||||
<p>Get production-ready Crawl4AI code instantly</p>
|
||||
</div>
|
||||
<div class="feature-card">
|
||||
<span class="feature-icon">🎨</span>
|
||||
@@ -85,9 +85,9 @@
|
||||
<div class="step-content">
|
||||
<h4>Download the Extension</h4>
|
||||
<p>Get the latest release from GitHub or use the button below</p>
|
||||
<a href="crawl4ai-assistant-v1.0.1.zip" class="download-button" download>
|
||||
<a href="crawl4ai-assistant-v1.2.1.zip" class="download-button" download>
|
||||
<span class="button-icon">↓</span>
|
||||
Download Extension (v1.0.1)
|
||||
Download Extension (v1.2.1)
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
@@ -110,67 +110,155 @@
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- Usage Guide -->
|
||||
<section class="usage-section">
|
||||
<h2>How to Use</h2>
|
||||
<div class="terminal-window">
|
||||
<div class="terminal-header">
|
||||
<span class="terminal-title">Step-by-Step Guide</span>
|
||||
<!-- Interactive Tools Section -->
|
||||
<section class="interactive-tools">
|
||||
<h2>Explore Our Tools</h2>
|
||||
|
||||
<div class="tools-container">
|
||||
<!-- Left Panel - Tool Selector -->
|
||||
<div class="tools-panel">
|
||||
<div class="tool-selector active" data-tool="schema-builder">
|
||||
<div class="tool-icon">📊</div>
|
||||
<div class="tool-info">
|
||||
<h3>Schema Builder</h3>
|
||||
<p>Visual data extraction</p>
|
||||
</div>
|
||||
<div class="tool-status">Available</div>
|
||||
</div>
|
||||
|
||||
<div class="tool-selector" data-tool="script-builder">
|
||||
<div class="tool-icon">🔴</div>
|
||||
<div class="tool-info">
|
||||
<h3>Script Builder</h3>
|
||||
<p>Browser automation</p>
|
||||
</div>
|
||||
<div class="tool-status alpha">Alpha</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="terminal-content">
|
||||
<div class="usage-flow">
|
||||
<div class="usage-step">
|
||||
<div class="usage-header">
|
||||
<span class="usage-icon">1️⃣</span>
|
||||
<h4>Start Schema Builder</h4>
|
||||
</div>
|
||||
<p>Click the extension icon and select "Schema Builder" to begin</p>
|
||||
|
||||
<!-- Right Panel - Tool Details -->
|
||||
<div class="tool-details">
|
||||
<!-- Schema Builder Details -->
|
||||
<div class="tool-content active" id="schema-builder">
|
||||
<div class="tool-header">
|
||||
<h3>📊 Schema Builder</h3>
|
||||
<span class="tool-tagline">Click to extract data visually</span>
|
||||
</div>
|
||||
|
||||
<div class="usage-step">
|
||||
<div class="usage-header">
|
||||
<span class="usage-icon">2️⃣</span>
|
||||
<h4>Select Container</h4>
|
||||
<div class="tool-steps">
|
||||
<div class="step-item">
|
||||
<div class="step-number">1</div>
|
||||
<div class="step-content">
|
||||
<h4>Select Container</h4>
|
||||
<p>Click on any repeating element like product cards or articles</p>
|
||||
<div class="step-visual">
|
||||
<span class="highlight-green">■</span> Elements highlighted in green
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p>Click on a container element (e.g., product card, article, listing)</p>
|
||||
<div class="code-snippet">
|
||||
<span class="comment"># Container will be highlighted in green</span>
|
||||
|
||||
<div class="step-item">
|
||||
<div class="step-number">2</div>
|
||||
<div class="step-content">
|
||||
<h4>Mark Fields</h4>
|
||||
<p>Click on data fields inside the container</p>
|
||||
<div class="step-visual">
|
||||
<span class="highlight-pink">■</span> Fields highlighted in pink
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="step-item">
|
||||
<div class="step-number">3</div>
|
||||
<div class="step-content">
|
||||
<h4>Generate & Extract</h4>
|
||||
<p>Get your CSS selectors and Python code instantly</p>
|
||||
<div class="step-visual">
|
||||
<span class="highlight-accent">⚡</span> Ready to use code
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="usage-step">
|
||||
<div class="usage-header">
|
||||
<span class="usage-icon">3️⃣</span>
|
||||
<h4>Select Fields</h4>
|
||||
<div class="tool-features">
|
||||
<div class="feature-tag">No CSS knowledge needed</div>
|
||||
<div class="feature-tag">Smart selector generation</div>
|
||||
<div class="feature-tag">LLM-ready schemas</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Script Builder Details -->
|
||||
<div class="tool-content" id="script-builder">
|
||||
<div class="tool-header">
|
||||
<h3>🔴 Script Builder</h3>
|
||||
<span class="tool-tagline">Record actions, generate automation</span>
|
||||
</div>
|
||||
|
||||
<div class="tool-steps">
|
||||
<div class="step-item">
|
||||
<div class="step-number">1</div>
|
||||
<div class="step-content">
|
||||
<h4>Hit Record</h4>
|
||||
<p>Start capturing your browser interactions</p>
|
||||
<div class="step-visual">
|
||||
<span class="recording-dot">●</span> Recording indicator
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p>Click on individual fields inside the container and name them</p>
|
||||
<div class="code-snippet">
|
||||
<span class="comment"># Fields will be highlighted in pink</span>
|
||||
<span class="comment"># Examples: title, price, description, image</span>
|
||||
|
||||
<div class="step-item">
|
||||
<div class="step-number">2</div>
|
||||
<div class="step-content">
|
||||
<h4>Interact Naturally</h4>
|
||||
<p>Click, type, scroll - everything is captured</p>
|
||||
<div class="step-visual">
|
||||
<span class="action-icon">🖱️</span> <span class="action-icon">⌨️</span> <span class="action-icon">📜</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="step-item">
|
||||
<div class="step-number">3</div>
|
||||
<div class="step-content">
|
||||
<h4>Export Script</h4>
|
||||
<p>Get JavaScript for Crawl4AI's js_code parameter</p>
|
||||
<div class="step-visual">
|
||||
<span class="highlight-accent">📝</span> Automation ready
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="usage-step">
|
||||
<div class="usage-header">
|
||||
<span class="usage-icon">4️⃣</span>
|
||||
<h4>Generate Code</h4>
|
||||
</div>
|
||||
<p>Click "Stop & Generate" to create your Python extraction code</p>
|
||||
<div class="tool-features">
|
||||
<div class="feature-tag">Smart action grouping</div>
|
||||
<div class="feature-tag">Wait detection</div>
|
||||
<div class="feature-tag">Keyboard shortcuts</div>
|
||||
<div class="feature-tag alpha-tag">Alpha version</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- Generated Code Example -->
|
||||
<section class="code-section">
|
||||
<h2>Generated Code Example</h2>
|
||||
<div class="terminal-window">
|
||||
<div class="terminal-header">
|
||||
<span class="terminal-title">example_extraction.py</span>
|
||||
</div>
|
||||
<div class="terminal-content">
|
||||
<pre><code><span class="keyword">import</span> asyncio
|
||||
<!-- Interactive Code Examples -->
|
||||
<section class="code-showcase">
|
||||
<h2>See the Generated Code</h2>
|
||||
|
||||
<div class="code-tabs">
|
||||
<button class="code-tab active" data-example="schema">📊 Schema Builder</button>
|
||||
<button class="code-tab" data-example="script">🔴 Script Builder</button>
|
||||
</div>
|
||||
|
||||
<div class="code-examples">
|
||||
<!-- Schema Builder Code -->
|
||||
<div class="code-example active" id="code-schema">
|
||||
<div class="terminal-window">
|
||||
<div class="terminal-header">
|
||||
<span class="terminal-title">schema_extraction.py</span>
|
||||
<button class="copy-button" data-code="schema">Copy</button>
|
||||
</div>
|
||||
<div class="terminal-content">
|
||||
<pre><code><span class="keyword">import</span> asyncio
|
||||
<span class="keyword">import</span> json
|
||||
<span class="keyword">from</span> crawl4ai <span class="keyword">import</span> AsyncWebCrawler, CrawlerRunConfig
|
||||
<span class="keyword">from</span> crawl4ai.extraction_strategy <span class="keyword">import</span> JsonCssExtractionStrategy
|
||||
@@ -191,51 +279,94 @@
|
||||
<span class="string">"selector"</span>: <span class="string">"span.price"</span>,
|
||||
<span class="string">"type"</span>: <span class="string">"text"</span>
|
||||
},
|
||||
{
|
||||
<span class="string">"name"</span>: <span class="string">"description"</span>,
|
||||
<span class="string">"selector"</span>: <span class="string">"p.description"</span>,
|
||||
<span class="string">"type"</span>: <span class="string">"text"</span>
|
||||
},
|
||||
{
|
||||
<span class="string">"name"</span>: <span class="string">"image"</span>,
|
||||
<span class="string">"selector"</span>: <span class="string">"img.product-image"</span>,
|
||||
<span class="string">"selector"</span>: <span class="string">"img.product-img"</span>,
|
||||
<span class="string">"type"</span>: <span class="string">"attribute"</span>,
|
||||
<span class="string">"attribute"</span>: <span class="string">"src"</span>
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
<span class="comment"># Create extraction strategy</span>
|
||||
extraction_strategy = JsonCssExtractionStrategy(schema, verbose=<span class="keyword">True</span>)
|
||||
|
||||
<span class="comment"># Configure the crawler</span>
|
||||
config = CrawlerRunConfig(
|
||||
extraction_strategy=extraction_strategy
|
||||
extraction_strategy=JsonCssExtractionStrategy(schema)
|
||||
)
|
||||
|
||||
|
||||
<span class="keyword">async</span> <span class="keyword">with</span> AsyncWebCrawler() <span class="keyword">as</span> crawler:
|
||||
result = <span class="keyword">await</span> crawler.arun(
|
||||
url=<span class="string">"https://example.com/products"</span>,
|
||||
config=config
|
||||
)
|
||||
|
||||
<span class="comment"># Parse the extracted data</span>
|
||||
products = json.loads(result.extracted_content)
|
||||
<span class="keyword">print</span>(<span class="string">f"Extracted {len(products)} products"</span>)
|
||||
|
||||
<span class="comment"># Display first product</span>
|
||||
<span class="keyword">if</span> products:
|
||||
<span class="keyword">print</span>(json.dumps(products[0], indent=2))
|
||||
|
||||
<span class="keyword">return</span> products
|
||||
<span class="keyword">return</span> json.loads(result.extracted_content)
|
||||
|
||||
<span class="comment"># Run the extraction</span>
|
||||
<span class="keyword">if</span> __name__ == <span class="string">"__main__"</span>:
|
||||
asyncio.run(extract_products())</code></pre>
|
||||
asyncio.run(extract_products())</code></pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Script Builder Code -->
|
||||
<div class="code-example" id="code-script">
|
||||
<div class="terminal-window">
|
||||
<div class="terminal-header">
|
||||
<span class="terminal-title">automation_script.py</span>
|
||||
<button class="copy-button" data-code="script">Copy</button>
|
||||
</div>
|
||||
<div class="terminal-content">
|
||||
<pre><code><span class="keyword">import</span> asyncio
|
||||
<span class="keyword">from</span> crawl4ai <span class="keyword">import</span> AsyncWebCrawler, CrawlerRunConfig
|
||||
|
||||
<span class="comment"># JavaScript generated from your recorded actions</span>
|
||||
js_script = <span class="string">"""
|
||||
// Search for products
|
||||
document.querySelector('button.search-toggle').click();
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
|
||||
// Type search query
|
||||
const searchInput = document.querySelector('input#search');
|
||||
searchInput.value = 'wireless headphones';
|
||||
searchInput.dispatchEvent(new Event('input', {bubbles: true}));
|
||||
|
||||
// Submit search
|
||||
searchInput.dispatchEvent(new KeyboardEvent('keydown', {
|
||||
key: 'Enter', keyCode: 13, bubbles: true
|
||||
}));
|
||||
|
||||
// Wait for results
|
||||
await new Promise(r => setTimeout(r, 2000));
|
||||
|
||||
// Click first product
|
||||
document.querySelector('.product-item:first-child').click();
|
||||
|
||||
// Wait for product page
|
||||
await new Promise(r => setTimeout(r, 1000));
|
||||
|
||||
// Add to cart
|
||||
document.querySelector('button.add-to-cart').click();
|
||||
"""</span>
|
||||
|
||||
<span class="keyword">async</span> <span class="keyword">def</span> <span class="function">automate_shopping</span>():
|
||||
config = CrawlerRunConfig(
|
||||
js_code=js_script,
|
||||
wait_for=<span class="string">"css:.cart-confirmation"</span>,
|
||||
screenshot=<span class="keyword">True</span>
|
||||
)
|
||||
|
||||
<span class="keyword">async</span> <span class="keyword">with</span> AsyncWebCrawler() <span class="keyword">as</span> crawler:
|
||||
result = <span class="keyword">await</span> crawler.arun(
|
||||
url=<span class="string">"https://shop.example.com"</span>,
|
||||
config=config
|
||||
)
|
||||
<span class="keyword">print</span>(<span class="string">f"✓ Automation complete: {result.url}"</span>)
|
||||
<span class="keyword">return</span> result
|
||||
|
||||
asyncio.run(automate_shopping())</code></pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
|
||||
<!-- Coming Soon Section -->
|
||||
<section class="coming-soon-section">
|
||||
<h2>Coming Soon: Even More Power</h2>
|
||||
@@ -279,17 +410,6 @@
|
||||
<code>🤖 Auto-detect fields • Smart naming • Pattern recognition</code>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="coming-feature">
|
||||
<div class="feature-header">
|
||||
<span class="feature-badge">Script</span>
|
||||
<h3>C4A Script Builder</h3>
|
||||
</div>
|
||||
<p>Visual automation script builder for complex interactions - fill forms, click buttons, handle pagination, all without writing code.</p>
|
||||
<div class="feature-preview">
|
||||
<code>🎯 Visual automation • Record & replay • Export as C4A script</code>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="stay-tuned">
|
||||
@@ -324,5 +444,61 @@
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// Tool Selector Interaction
|
||||
document.querySelectorAll('.tool-selector').forEach(selector => {
|
||||
selector.addEventListener('click', function() {
|
||||
// Remove active class from all selectors
|
||||
document.querySelectorAll('.tool-selector').forEach(s => s.classList.remove('active'));
|
||||
document.querySelectorAll('.tool-content').forEach(c => c.classList.remove('active'));
|
||||
|
||||
// Add active class to clicked selector
|
||||
this.classList.add('active');
|
||||
|
||||
// Show corresponding content
|
||||
const toolId = this.getAttribute('data-tool');
|
||||
document.getElementById(toolId).classList.add('active');
|
||||
});
|
||||
});
|
||||
|
||||
// Code Tab Interaction
|
||||
document.querySelectorAll('.code-tab').forEach(tab => {
|
||||
tab.addEventListener('click', function() {
|
||||
// Remove active class from all tabs
|
||||
document.querySelectorAll('.code-tab').forEach(t => t.classList.remove('active'));
|
||||
document.querySelectorAll('.code-example').forEach(e => e.classList.remove('active'));
|
||||
|
||||
// Add active class to clicked tab
|
||||
this.classList.add('active');
|
||||
|
||||
// Show corresponding code
|
||||
const exampleId = this.getAttribute('data-example');
|
||||
document.getElementById('code-' + exampleId).classList.add('active');
|
||||
});
|
||||
});
|
||||
|
||||
// Copy Button Functionality
|
||||
document.querySelectorAll('.copy-button').forEach(button => {
|
||||
button.addEventListener('click', async function() {
|
||||
const codeType = this.getAttribute('data-code');
|
||||
const codeElement = document.getElementById('code-' + codeType).querySelector('pre code');
|
||||
const codeText = codeElement.textContent;
|
||||
|
||||
try {
|
||||
await navigator.clipboard.writeText(codeText);
|
||||
this.textContent = 'Copied!';
|
||||
this.classList.add('copied');
|
||||
|
||||
setTimeout(() => {
|
||||
this.textContent = 'Copy';
|
||||
this.classList.remove('copied');
|
||||
}, 2000);
|
||||
} catch (err) {
|
||||
console.error('Failed to copy code:', err);
|
||||
}
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user