This commit introduces significant enhancements to the Crawl4AI ecosystem: Chrome Extension - Script Builder (Alpha): - Add recording functionality to capture user interactions (clicks, typing, scrolling) - Implement smart event grouping for cleaner script generation - Support export to both JavaScript and C4A script formats - Add timeline view for visualizing and editing recorded actions - Include wait commands (time-based and element-based) - Add saved flows functionality for reusing automation scripts - Update UI with consistent dark terminal theme (Dank Mono font, green/pink accents) - Release new extension versions: v1.1.0, v1.2.0, v1.2.1 LLM Context Builder Improvements: - Reorganize context files from llmtxt/ to llm.txt/ with better structure - Separate diagram templates from text content (diagrams/ and txt/ subdirectories) - Add comprehensive context files for all major Crawl4AI components - Improve file naming convention for better discoverability Documentation Updates: - Update apps index page to match main documentation theme - Standardize color scheme: "Available" tags use primary color (#50ffff) - Change "Coming Soon" tags to dark gray for better visual hierarchy - Add interactive two-column layout for extension landing page - Include code examples for both Schema Builder and Script Builder features Technical Improvements: - Enhance event capture mechanism with better element selection - Add support for contenteditable elements and complex form interactions - Implement proper scroll event handling for both window and element scrolling - Add meta key support for keyboard shortcuts - Improve selector generation for more reliable element targeting The Script Builder is released as Alpha, acknowledging potential bugs while providing early access to this powerful automation recording feature.
504 lines
26 KiB
HTML
504 lines
26 KiB
HTML
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>Crawl4AI Assistant - Chrome Extension for Visual Web Scraping</title>
|
|
<link rel="stylesheet" href="assistant.css">
|
|
</head>
|
|
<body>
|
|
<div class="terminal-container">
|
|
<div class="header">
|
|
<div class="header-content">
|
|
<div class="logo-section">
|
|
<img src="../../img/favicon-32x32.png" alt="Crawl4AI Logo" class="logo">
|
|
<div>
|
|
<h1>Crawl4AI Assistant</h1>
|
|
<p class="tagline">Chrome Extension for Visual Web Scraping</p>
|
|
</div>
|
|
</div>
|
|
<nav class="nav-links">
|
|
<a href="../../" class="nav-link">← Back to Docs</a>
|
|
<a href="../" class="nav-link">All Apps</a>
|
|
<a href="https://github.com/unclecode/crawl4ai" class="nav-link" target="_blank">GitHub</a>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="content">
|
|
<!-- Video Section -->
|
|
<section class="video-section">
|
|
<div class="video-wrapper">
|
|
<video autoplay loop muted playsinline class="demo-video">
|
|
<source src="demo.mp4" type="video/mp4">
|
|
Your browser does not support the video tag.
|
|
</video>
|
|
</div>
|
|
</section>
|
|
|
|
<!-- Introduction -->
|
|
<section class="intro-section">
|
|
<div class="terminal-window">
|
|
<div class="terminal-header">
|
|
<span class="terminal-title">About Crawl4AI Assistant</span>
|
|
</div>
|
|
<div class="terminal-content">
|
|
<p>Transform any website into structured data with just a few clicks! The Crawl4AI Assistant Chrome Extension provides two powerful tools for web scraping and automation.</p>
|
|
|
|
<div class="features-grid">
|
|
<div class="feature-card">
|
|
<span class="feature-icon">🎯</span>
|
|
<h3>Schema Builder</h3>
|
|
<p>Click to select elements and build extraction schemas visually</p>
|
|
</div>
|
|
<div class="feature-card">
|
|
<span class="feature-icon">🔴</span>
|
|
<h3>Script Builder <span style="color: #f380f5; font-size: 0.75rem;">(Alpha)</span></h3>
|
|
<p>Record browser actions to create automation scripts</p>
|
|
</div>
|
|
<div class="feature-card">
|
|
<span class="feature-icon">🐍</span>
|
|
<h3>Python Code</h3>
|
|
<p>Get production-ready Crawl4AI code instantly</p>
|
|
</div>
|
|
<div class="feature-card">
|
|
<span class="feature-icon">🎨</span>
|
|
<h3>Beautiful UI</h3>
|
|
<p>Draggable toolbar with macOS-style interface</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
|
|
<!-- Quick Start -->
|
|
<section class="quickstart-section">
|
|
<h2>Quick Start</h2>
|
|
<div class="terminal-window">
|
|
<div class="terminal-header">
|
|
<span class="terminal-title">Installation</span>
|
|
</div>
|
|
<div class="terminal-content">
|
|
<div class="installation-steps">
|
|
<div class="step">
|
|
<span class="step-number">1</span>
|
|
<div class="step-content">
|
|
<h4>Download the Extension</h4>
|
|
<p>Get the latest release from GitHub or use the button below</p>
|
|
<a href="crawl4ai-assistant-v1.2.1.zip" class="download-button" download>
|
|
<span class="button-icon">↓</span>
|
|
Download Extension (v1.2.1)
|
|
</a>
|
|
</div>
|
|
</div>
|
|
<div class="step">
|
|
<span class="step-number">2</span>
|
|
<div class="step-content">
|
|
<h4>Load in Chrome</h4>
|
|
<p>Navigate to <code>chrome://extensions/</code> and enable Developer Mode</p>
|
|
</div>
|
|
</div>
|
|
<div class="step">
|
|
<span class="step-number">3</span>
|
|
<div class="step-content">
|
|
<h4>Load Unpacked</h4>
|
|
<p>Click "Load unpacked" and select the extracted extension folder</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
|
|
<!-- Interactive Tools Section -->
|
|
<section class="interactive-tools">
|
|
<h2>Explore Our Tools</h2>
|
|
|
|
<div class="tools-container">
|
|
<!-- Left Panel - Tool Selector -->
|
|
<div class="tools-panel">
|
|
<div class="tool-selector active" data-tool="schema-builder">
|
|
<div class="tool-icon">📊</div>
|
|
<div class="tool-info">
|
|
<h3>Schema Builder</h3>
|
|
<p>Visual data extraction</p>
|
|
</div>
|
|
<div class="tool-status">Available</div>
|
|
</div>
|
|
|
|
<div class="tool-selector" data-tool="script-builder">
|
|
<div class="tool-icon">🔴</div>
|
|
<div class="tool-info">
|
|
<h3>Script Builder</h3>
|
|
<p>Browser automation</p>
|
|
</div>
|
|
<div class="tool-status alpha">Alpha</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Right Panel - Tool Details -->
|
|
<div class="tool-details">
|
|
<!-- Schema Builder Details -->
|
|
<div class="tool-content active" id="schema-builder">
|
|
<div class="tool-header">
|
|
<h3>📊 Schema Builder</h3>
|
|
<span class="tool-tagline">Click to extract data visually</span>
|
|
</div>
|
|
|
|
<div class="tool-steps">
|
|
<div class="step-item">
|
|
<div class="step-number">1</div>
|
|
<div class="step-content">
|
|
<h4>Select Container</h4>
|
|
<p>Click on any repeating element like product cards or articles</p>
|
|
<div class="step-visual">
|
|
<span class="highlight-green">■</span> Elements highlighted in green
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="step-item">
|
|
<div class="step-number">2</div>
|
|
<div class="step-content">
|
|
<h4>Mark Fields</h4>
|
|
<p>Click on data fields inside the container</p>
|
|
<div class="step-visual">
|
|
<span class="highlight-pink">■</span> Fields highlighted in pink
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="step-item">
|
|
<div class="step-number">3</div>
|
|
<div class="step-content">
|
|
<h4>Generate & Extract</h4>
|
|
<p>Get your CSS selectors and Python code instantly</p>
|
|
<div class="step-visual">
|
|
<span class="highlight-accent">⚡</span> Ready to use code
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="tool-features">
|
|
<div class="feature-tag">No CSS knowledge needed</div>
|
|
<div class="feature-tag">Smart selector generation</div>
|
|
<div class="feature-tag">LLM-ready schemas</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Script Builder Details -->
|
|
<div class="tool-content" id="script-builder">
|
|
<div class="tool-header">
|
|
<h3>🔴 Script Builder</h3>
|
|
<span class="tool-tagline">Record actions, generate automation</span>
|
|
</div>
|
|
|
|
<div class="tool-steps">
|
|
<div class="step-item">
|
|
<div class="step-number">1</div>
|
|
<div class="step-content">
|
|
<h4>Hit Record</h4>
|
|
<p>Start capturing your browser interactions</p>
|
|
<div class="step-visual">
|
|
<span class="recording-dot">●</span> Recording indicator
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="step-item">
|
|
<div class="step-number">2</div>
|
|
<div class="step-content">
|
|
<h4>Interact Naturally</h4>
|
|
<p>Click, type, scroll - everything is captured</p>
|
|
<div class="step-visual">
|
|
<span class="action-icon">🖱️</span> <span class="action-icon">⌨️</span> <span class="action-icon">📜</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="step-item">
|
|
<div class="step-number">3</div>
|
|
<div class="step-content">
|
|
<h4>Export Script</h4>
|
|
<p>Get JavaScript for Crawl4AI's js_code parameter</p>
|
|
<div class="step-visual">
|
|
<span class="highlight-accent">📝</span> Automation ready
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="tool-features">
|
|
<div class="feature-tag">Smart action grouping</div>
|
|
<div class="feature-tag">Wait detection</div>
|
|
<div class="feature-tag">Keyboard shortcuts</div>
|
|
<div class="feature-tag alpha-tag">Alpha version</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
|
|
<!-- Interactive Code Examples -->
|
|
<section class="code-showcase">
|
|
<h2>See the Generated Code</h2>
|
|
|
|
<div class="code-tabs">
|
|
<button class="code-tab active" data-example="schema">📊 Schema Builder</button>
|
|
<button class="code-tab" data-example="script">🔴 Script Builder</button>
|
|
</div>
|
|
|
|
<div class="code-examples">
|
|
<!-- Schema Builder Code -->
|
|
<div class="code-example active" id="code-schema">
|
|
<div class="terminal-window">
|
|
<div class="terminal-header">
|
|
<span class="terminal-title">schema_extraction.py</span>
|
|
<button class="copy-button" data-code="schema">Copy</button>
|
|
</div>
|
|
<div class="terminal-content">
|
|
<pre><code><span class="keyword">import</span> asyncio
|
|
<span class="keyword">import</span> json
|
|
<span class="keyword">from</span> crawl4ai <span class="keyword">import</span> AsyncWebCrawler, CrawlerRunConfig
|
|
<span class="keyword">from</span> crawl4ai.extraction_strategy <span class="keyword">import</span> JsonCssExtractionStrategy
|
|
|
|
<span class="keyword">async</span> <span class="keyword">def</span> <span class="function">extract_products</span>():
|
|
<span class="comment"># Schema generated from your visual selection</span>
|
|
schema = {
|
|
<span class="string">"name"</span>: <span class="string">"Product Catalog"</span>,
|
|
<span class="string">"baseSelector"</span>: <span class="string">"div.product-card"</span>, <span class="comment"># Container you clicked</span>
|
|
<span class="string">"fields"</span>: [
|
|
{
|
|
<span class="string">"name"</span>: <span class="string">"title"</span>,
|
|
<span class="string">"selector"</span>: <span class="string">"h3.product-title"</span>,
|
|
<span class="string">"type"</span>: <span class="string">"text"</span>
|
|
},
|
|
{
|
|
<span class="string">"name"</span>: <span class="string">"price"</span>,
|
|
<span class="string">"selector"</span>: <span class="string">"span.price"</span>,
|
|
<span class="string">"type"</span>: <span class="string">"text"</span>
|
|
},
|
|
{
|
|
<span class="string">"name"</span>: <span class="string">"image"</span>,
|
|
<span class="string">"selector"</span>: <span class="string">"img.product-img"</span>,
|
|
<span class="string">"type"</span>: <span class="string">"attribute"</span>,
|
|
<span class="string">"attribute"</span>: <span class="string">"src"</span>
|
|
}
|
|
]
|
|
}
|
|
|
|
config = CrawlerRunConfig(
|
|
extraction_strategy=JsonCssExtractionStrategy(schema)
|
|
)
|
|
|
|
<span class="keyword">async</span> <span class="keyword">with</span> AsyncWebCrawler() <span class="keyword">as</span> crawler:
|
|
result = <span class="keyword">await</span> crawler.arun(
|
|
url=<span class="string">"https://example.com/products"</span>,
|
|
config=config
|
|
)
|
|
<span class="keyword">return</span> json.loads(result.extracted_content)
|
|
|
|
asyncio.run(extract_products())</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Script Builder Code -->
|
|
<div class="code-example" id="code-script">
|
|
<div class="terminal-window">
|
|
<div class="terminal-header">
|
|
<span class="terminal-title">automation_script.py</span>
|
|
<button class="copy-button" data-code="script">Copy</button>
|
|
</div>
|
|
<div class="terminal-content">
|
|
<pre><code><span class="keyword">import</span> asyncio
|
|
<span class="keyword">from</span> crawl4ai <span class="keyword">import</span> AsyncWebCrawler, CrawlerRunConfig
|
|
|
|
<span class="comment"># JavaScript generated from your recorded actions</span>
|
|
js_script = <span class="string">"""
|
|
// Search for products
|
|
document.querySelector('button.search-toggle').click();
|
|
await new Promise(r => setTimeout(r, 500));
|
|
|
|
// Type search query
|
|
const searchInput = document.querySelector('input#search');
|
|
searchInput.value = 'wireless headphones';
|
|
searchInput.dispatchEvent(new Event('input', {bubbles: true}));
|
|
|
|
// Submit search
|
|
searchInput.dispatchEvent(new KeyboardEvent('keydown', {
|
|
key: 'Enter', keyCode: 13, bubbles: true
|
|
}));
|
|
|
|
// Wait for results
|
|
await new Promise(r => setTimeout(r, 2000));
|
|
|
|
// Click first product
|
|
document.querySelector('.product-item:first-child').click();
|
|
|
|
// Wait for product page
|
|
await new Promise(r => setTimeout(r, 1000));
|
|
|
|
// Add to cart
|
|
document.querySelector('button.add-to-cart').click();
|
|
"""</span>
|
|
|
|
<span class="keyword">async</span> <span class="keyword">def</span> <span class="function">automate_shopping</span>():
|
|
config = CrawlerRunConfig(
|
|
js_code=js_script,
|
|
wait_for=<span class="string">"css:.cart-confirmation"</span>,
|
|
screenshot=<span class="keyword">True</span>
|
|
)
|
|
|
|
<span class="keyword">async</span> <span class="keyword">with</span> AsyncWebCrawler() <span class="keyword">as</span> crawler:
|
|
result = <span class="keyword">await</span> crawler.arun(
|
|
url=<span class="string">"https://shop.example.com"</span>,
|
|
config=config
|
|
)
|
|
<span class="keyword">print</span>(<span class="string">f"✓ Automation complete: {result.url}"</span>)
|
|
<span class="keyword">return</span> result
|
|
|
|
asyncio.run(automate_shopping())</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
|
|
|
|
<!-- Coming Soon Section -->
|
|
<section class="coming-soon-section">
|
|
<h2>Coming Soon: Even More Power</h2>
|
|
<div class="terminal-window">
|
|
<div class="terminal-header">
|
|
<span class="terminal-title">Future Features</span>
|
|
</div>
|
|
<div class="terminal-content">
|
|
<p class="intro-text">We're continuously expanding C4AI Assistant with powerful new features to make web scraping even easier:</p>
|
|
|
|
<div class="coming-features">
|
|
<div class="coming-feature">
|
|
<div class="feature-header">
|
|
<span class="feature-badge">Cloud</span>
|
|
<h3>Run on C4AI Cloud</h3>
|
|
</div>
|
|
<p>Execute your extraction directly in the cloud without setting up any local environment. Just click "Run on Cloud" and get your data instantly.</p>
|
|
<div class="feature-preview">
|
|
<code>☁️ Instant results • Auto-scaling</code>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="coming-feature">
|
|
<div class="feature-header">
|
|
<span class="feature-badge">Direct</span>
|
|
<h3>Get CrawlResult Without Code</h3>
|
|
</div>
|
|
<p>Skip the code generation entirely! Get extracted data directly in the extension as a CrawlResult object, ready to download as JSON.</p>
|
|
<div class="feature-preview">
|
|
<code>📊 One-click extraction • No Python needed • Export to JSON/CSV</code>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="coming-feature">
|
|
<div class="feature-header">
|
|
<span class="feature-badge">AI</span>
|
|
<h3>Smart Schema Suggestions</h3>
|
|
</div>
|
|
<p>AI-powered field detection that automatically suggests the most likely data fields on any page, making schema building even faster.</p>
|
|
<div class="feature-preview">
|
|
<code>🤖 Auto-detect fields • Smart naming • Pattern recognition</code>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="stay-tuned">
|
|
<p>🚀 Stay tuned for updates! Follow our <a href="https://github.com/unclecode/crawl4ai" target="_blank">GitHub</a> for the latest releases.</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
|
|
<!-- Footer -->
|
|
<footer class="footer">
|
|
<div class="footer-content">
|
|
<div class="footer-section">
|
|
<h4>Resources</h4>
|
|
<ul>
|
|
<li><a href="https://github.com/unclecode/crawl4ai">GitHub Repository</a></li>
|
|
<li><a href="../../">Documentation</a></li>
|
|
<li><a href="https://discord.gg/jP8KfhDhyN">Discord Community</a></li>
|
|
</ul>
|
|
</div>
|
|
<div class="footer-section">
|
|
<h4>Connect</h4>
|
|
<ul>
|
|
<li><a href="https://twitter.com/unclecode">Twitter @unclecode</a></li>
|
|
<li><a href="https://github.com/unclecode">GitHub @unclecode</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
<div class="footer-bottom">
|
|
<p>Made with 🚀 by the Crawl4AI team</p>
|
|
</div>
|
|
</footer>
|
|
</div>
|
|
</div>
|
|
|
|
<script>
|
|
// Tool Selector Interaction
|
|
document.querySelectorAll('.tool-selector').forEach(selector => {
|
|
selector.addEventListener('click', function() {
|
|
// Remove active class from all selectors
|
|
document.querySelectorAll('.tool-selector').forEach(s => s.classList.remove('active'));
|
|
document.querySelectorAll('.tool-content').forEach(c => c.classList.remove('active'));
|
|
|
|
// Add active class to clicked selector
|
|
this.classList.add('active');
|
|
|
|
// Show corresponding content
|
|
const toolId = this.getAttribute('data-tool');
|
|
document.getElementById(toolId).classList.add('active');
|
|
});
|
|
});
|
|
|
|
// Code Tab Interaction
|
|
document.querySelectorAll('.code-tab').forEach(tab => {
|
|
tab.addEventListener('click', function() {
|
|
// Remove active class from all tabs
|
|
document.querySelectorAll('.code-tab').forEach(t => t.classList.remove('active'));
|
|
document.querySelectorAll('.code-example').forEach(e => e.classList.remove('active'));
|
|
|
|
// Add active class to clicked tab
|
|
this.classList.add('active');
|
|
|
|
// Show corresponding code
|
|
const exampleId = this.getAttribute('data-example');
|
|
document.getElementById('code-' + exampleId).classList.add('active');
|
|
});
|
|
});
|
|
|
|
// Copy Button Functionality
|
|
document.querySelectorAll('.copy-button').forEach(button => {
|
|
button.addEventListener('click', async function() {
|
|
const codeType = this.getAttribute('data-code');
|
|
const codeElement = document.getElementById('code-' + codeType).querySelector('pre code');
|
|
const codeText = codeElement.textContent;
|
|
|
|
try {
|
|
await navigator.clipboard.writeText(codeText);
|
|
this.textContent = 'Copied!';
|
|
this.classList.add('copied');
|
|
|
|
setTimeout(() => {
|
|
this.textContent = 'Copy';
|
|
this.classList.remove('copied');
|
|
}, 2000);
|
|
} catch (err) {
|
|
console.error('Failed to copy code:', err);
|
|
}
|
|
});
|
|
});
|
|
</script>
|
|
</body>
|
|
</html> |