Refactor Crawl4AI Assistant: Rename Schema Builder to Click2Crawl, update UI elements, and remove deprecated files

- Updated overlay.css to add gap in titlebar.
- Deleted schemaBuilder_v1.js and associated zip files (v1.0.0 to v1.2.0).
- Modified index.html to reflect new Click2Crawl feature and updated descriptions.
- Updated manifest.json to include new JavaScript files for Click2Crawl and markdown extraction.
- Refined popup styles and HTML to align with new feature names and functionalities.
- Enhanced user instructions and tooltips to guide users on the new Click2Crawl and Markdown Extraction features.
This commit is contained in:
UncleCode
2025-06-10 15:40:26 +08:00
parent 0ac12da9f3
commit 4eb90b41b6
16 changed files with 589 additions and 762 deletions

View File

@@ -20,7 +20,8 @@
"Bash(docker logs:*)",
"Bash(curl:*)",
"Bash(docker compose:*)",
"Bash(./test-final-integration.sh:*)"
"Bash(./test-final-integration.sh:*)",
"Bash(mv:*)"
]
},
"enableAllProjectMcpServers": false

View File

@@ -1,14 +1,15 @@
# Crawl4AI Chrome Extension
Visual schema and script builder for Crawl4AI - Build extraction schemas by clicking on webpage elements!
Visual extraction tools for Crawl4AI - Click to extract data and content from any webpage!
## 🚀 Features
- **Visual Schema Builder**: Click on elements to build extraction schemas
- **Click2Crawl**: Click on elements to build data extraction schemas instantly
- **Markdown Extraction**: Select elements and export as clean markdown
- **Script Builder (Alpha)**: Record browser actions to create automation scripts
- **Smart Element Selection**: Container and field selection with visual feedback
- **Code Generation**: Generates complete Python code with LLM integration
- **Code Generation**: Generates complete Python code for Crawl4AI
- **Beautiful Dark UI**: Consistent with Crawl4AI's design language
- **One-Click Download**: Get your generated code instantly
## 📦 Installation
@@ -33,11 +34,11 @@ If you want proper icons:
## 🎯 How to Use
### Building a Schema
### Using Click2Crawl
1. **Navigate to any website** you want to extract data from
2. **Click the Crawl4AI extension icon** in your toolbar
3. **Click "Schema Builder"** to start the capture mode
3. **Click "Click2Crawl"** to start the capture mode
4. **Select a container element**:
- Hover over elements (they'll highlight in blue)
- Click on a repeating container (e.g., product card, article block)
@@ -45,9 +46,9 @@ If you want proper icons:
- Elements will now highlight in green
- Click on each piece of data you want to extract
- Name each field (e.g., "title", "price", "description")
6. **Generate the code**:
- Click "Generate Code" in the extension popup
- A Python file will automatically download
6. **Test and Export**:
- Click "Test Schema" to see extracted data instantly
- Export as Python code, JSON schema, or markdown
### Running the Generated Code

View File

@@ -1,15 +1,16 @@
// Enhanced SchemaBuilder class for Crawl4AI Chrome Extension
// Click2Crawl class for Crawl4AI Chrome Extension
// Click elements to build extraction schemas
// Singleton instance to prevent multiple toolbars
let schemaBuilderInstance = null;
let click2CrawlInstance = null;
class SchemaBuilder {
class Click2Crawl {
constructor() {
// Prevent multiple instances
if (schemaBuilderInstance) {
schemaBuilderInstance.stop();
if (click2CrawlInstance) {
click2CrawlInstance.stop();
}
schemaBuilderInstance = this;
click2CrawlInstance = this;
this.container = null;
this.fields = [];
@@ -57,9 +58,15 @@ class SchemaBuilder {
this.inspectingFields = false;
this.parentLevels = 1;
// Clean up markdown preview modal
if (this.markdownPreviewModal) {
this.markdownPreviewModal.destroy();
this.markdownPreviewModal = null;
}
// Clear singleton reference
if (schemaBuilderInstance === this) {
schemaBuilderInstance = null;
if (click2CrawlInstance === this) {
click2CrawlInstance = null;
}
}
@@ -97,8 +104,8 @@ class SchemaBuilder {
<button class="c4ai-dot c4ai-dot-minimize"></button>
<button class="c4ai-dot c4ai-dot-maximize"></button>
</div>
<img src="${chrome.runtime.getURL('icons/icon-16.png')}" class="c4ai-titlebar-icon" alt="Crawl4AI">
<div class="c4ai-titlebar-title">🔧 Schema Builder</div>
<div class="c4ai-titlebar-title"> Click2Crawl</div>
<img src="${chrome.runtime.getURL('icons/icon-16.png')}" class="c4ai-titlebar-icon" alt="Crawl4AI" style="margin-left: auto;">
</div>
<div class="c4ai-toolbar-content">
<div class="c4ai-toolbar-status">
@@ -151,6 +158,9 @@ class SchemaBuilder {
<button id="c4ai-export-data" class="c4ai-action-btn c4ai-export-btn" disabled>
<span>📊</span> Data
</button>
<button id="c4ai-export-markdown" class="c4ai-action-btn c4ai-export-btn" disabled>
<span>📝</span> Markdown
</button>
</div>
</div>
@@ -202,6 +212,7 @@ class SchemaBuilder {
addClickHandler('c4ai-test', () => this.testSchema());
addClickHandler('c4ai-export-schema', () => this.exportSchema());
addClickHandler('c4ai-export-data', () => this.exportData());
addClickHandler('c4ai-export-markdown', () => this.exportMarkdown());
addClickHandler('c4ai-deploy-cloud', () => this.deployToCloud());
addClickHandler('c4ai-close', () => this.stop());
@@ -273,11 +284,16 @@ class SchemaBuilder {
handleClick(e) {
const element = e.target;
// Check if clicking on our UI elements
// Check if clicking on our UI elements (including markdown preview modal)
if (this.isOurElement(element)) {
return; // Let toolbar clicks work normally
}
// Additional check for markdown preview modal classes
if (element.closest('.c4ai-c2c-preview') || element.closest('.c4ai-preview-options')) {
return; // Don't interfere with markdown preview modal
}
// Use current element
const targetElement = this.currentElement || element;
@@ -303,7 +319,9 @@ class SchemaBuilder {
isOurElement(element) {
return window.C4AI_Utils.isOurElement(element) ||
(this.selectedBox && element === this.selectedBox);
(this.selectedBox && element === this.selectedBox) ||
(this.markdownPreviewModal && this.markdownPreviewModal.modal &&
(element === this.markdownPreviewModal.modal || this.markdownPreviewModal.modal.contains(element)));
}
showSelectedBox(element) {
@@ -499,6 +517,9 @@ class SchemaBuilder {
}
showFieldDialog(element) {
// Remove any existing field dialogs first
document.querySelectorAll('.c4ai-field-dialog').forEach(d => d.remove());
const dialog = document.createElement('div');
dialog.className = 'c4ai-field-dialog';
@@ -922,6 +943,7 @@ class SchemaBuilder {
document.getElementById('c4ai-test').disabled = false;
document.getElementById('c4ai-export-schema').disabled = false;
document.getElementById('c4ai-export-data').disabled = false;
document.getElementById('c4ai-export-markdown').disabled = false;
document.getElementById('c4ai-deploy-cloud').disabled = false;
} else {
schemaSection.style.display = 'none';
@@ -976,6 +998,9 @@ class SchemaBuilder {
const field = this.fields[index];
if (!field) return;
// Remove any existing field dialogs first
document.querySelectorAll('.c4ai-field-dialog').forEach(d => d.remove());
// Re-show the field dialog with existing values
const dialog = document.createElement('div');
dialog.className = 'c4ai-field-dialog';
@@ -1476,6 +1501,137 @@ class SchemaBuilder {
await this.testSchema();
}
async exportMarkdown() {
// Initialize markdown converter if not already done
if (!this.markdownConverter) {
this.markdownConverter = new MarkdownConverter();
}
if (!this.contentAnalyzer) {
this.contentAnalyzer = new ContentAnalyzer();
}
// Initialize markdown preview modal if not already done
if (!this.markdownPreviewModal) {
this.markdownPreviewModal = new MarkdownPreviewModal();
}
// Get all matching containers
const containers = document.querySelectorAll(this.container.selector);
if (containers.length === 0) {
this.showNotification('No matching containers found', 'error');
return;
}
// Show modal with callback to generate markdown
this.markdownPreviewModal.show(async (options) => {
return await this.generateMarkdownFromSchema(options);
});
}
async generateMarkdownFromSchema(options) {
// Get all matching containers
const containers = document.querySelectorAll(this.container.selector);
const markdownParts = [];
for (let i = 0; i < containers.length; i++) {
const container = containers[i];
// Add XPath header if enabled
if (options.includeXPath) {
const xpath = this.getXPath(container);
markdownParts.push(`### Container ${i + 1} - XPath: \`${xpath}\`\n`);
}
// Extract data based on schema fields
const extractedData = {};
this.fields.forEach(field => {
try {
const element = container.querySelector(field.selector);
if (element) {
if (field.type === 'text') {
extractedData[field.name] = element.textContent.trim();
} else if (field.type === 'attribute' && field.attribute) {
extractedData[field.name] = element.getAttribute(field.attribute);
}
}
} catch (e) {
// Skip invalid selectors
}
});
// Convert container to markdown based on options
const analysis = await this.contentAnalyzer.analyze([container]);
const containerMarkdown = await this.markdownConverter.convert([container], {
...options,
analysis,
extractedData // Pass extracted data for context
});
// Trim the markdown before adding
const trimmedMarkdown = containerMarkdown.trim();
markdownParts.push(trimmedMarkdown);
// Add separator if enabled and not last element
if (options.addSeparators && i < containers.length - 1) {
markdownParts.push('\n---\n');
}
}
return markdownParts.join('\n');
}
getXPath(element) {
if (element.id) {
return `//*[@id="${element.id}"]`;
}
const parts = [];
let current = element;
while (current && current.nodeType === Node.ELEMENT_NODE) {
let index = 0;
let sibling = current.previousSibling;
while (sibling) {
if (sibling.nodeType === Node.ELEMENT_NODE && sibling.nodeName === current.nodeName) {
index++;
}
sibling = sibling.previousSibling;
}
const tagName = current.nodeName.toLowerCase();
const part = index > 0 ? `${tagName}[${index + 1}]` : tagName;
parts.unshift(part);
current = current.parentNode;
}
return '/' + parts.join('/');
}
showNotification(message, type = 'success') {
const notification = document.createElement('div');
notification.className = `c4ai-notification c4ai-notification-${type}`;
notification.textContent = message;
document.body.appendChild(notification);
// Animate in
setTimeout(() => notification.classList.add('show'), 10);
// Remove after 3 seconds
setTimeout(() => {
notification.classList.remove('show');
setTimeout(() => notification.remove(), 300);
}, 3000);
}
deployToCloud() {
// Create cloud deployment modal
const modal = document.createElement('div');
@@ -1808,5 +1964,5 @@ if __name__ == "__main__":
// Export for use in content script
if (typeof window !== 'undefined') {
window.SchemaBuilder = SchemaBuilder;
window.Click2Crawl = Click2Crawl;
}

View File

@@ -1,5 +1,5 @@
// Main content script for Crawl4AI Assistant
// Coordinates between SchemaBuilder and ScriptBuilder
// Coordinates between Click2Crawl, ScriptBuilder, and MarkdownExtraction
let activeBuilder = null;
@@ -13,8 +13,8 @@ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
}
if (request.mode === 'schema') {
console.log('Starting Schema Builder');
activeBuilder = new SchemaBuilder();
console.log('Starting Click2Crawl');
activeBuilder = new Click2Crawl();
activeBuilder.start();
} else if (request.mode === 'script') {
console.log('Starting Script Builder');
@@ -34,8 +34,8 @@ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
activeBuilder.deactivate?.();
activeBuilder = null;
}
console.log('Starting Schema Builder');
activeBuilder = new SchemaBuilder();
console.log('Starting Click2Crawl');
activeBuilder = new Click2Crawl();
activeBuilder.start();
sendResponse({ success: true });
} else if (request.action === 'startScriptCapture') {
@@ -52,8 +52,8 @@ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
activeBuilder.deactivate?.();
activeBuilder = null;
}
console.log('Starting Click2Crawl');
activeBuilder = new Click2CrawlBuilder();
console.log('Starting Markdown Extraction');
activeBuilder = new MarkdownExtraction();
sendResponse({ success: true });
} else if (request.action === 'generateCode') {
if (activeBuilder && activeBuilder.generateCode) {

View File

@@ -1,26 +1,14 @@
class Click2CrawlBuilder {
class MarkdownExtraction {
constructor() {
this.selectedElements = new Set();
this.highlightBoxes = new Map();
this.selectionMode = false;
this.toolbar = null;
this.previewPanel = null;
this.markdownPreviewModal = null;
this.selectionCounter = 0;
this.markdownConverter = null;
this.contentAnalyzer = null;
// Configuration options
this.options = {
includeImages: true,
preserveTables: true,
keepCodeFormatting: true,
simplifyLayout: false,
preserveLinks: true,
addSeparators: true,
includeXPath: false,
textOnly: false
};
this.init();
}
@@ -44,7 +32,7 @@ class Click2CrawlBuilder {
<span class="c4ai-dot c4ai-dot-yellow"></span>
<span class="c4ai-dot c4ai-dot-green"></span>
</div>
<span class="c4ai-toolbar-title">Click2Crawl</span>
<span class="c4ai-toolbar-title">Markdown Extraction</span>
<button class="c4ai-close-btn" title="Close">×</button>
</div>
<div class="c4ai-toolbar-content">
@@ -363,19 +351,18 @@ class Click2CrawlBuilder {
}
async showPreview() {
// Generate markdown from selected elements
const markdown = await this.generateMarkdown();
// Create or update preview panel
if (!this.previewPanel) {
this.createPreviewPanel();
// Initialize markdown preview modal if not already done
if (!this.markdownPreviewModal) {
this.markdownPreviewModal = new MarkdownPreviewModal();
}
await this.updatePreviewContent(markdown);
this.previewPanel.style.display = 'block';
// Show modal with callback to generate markdown
this.markdownPreviewModal.show(async (options) => {
return await this.generateMarkdown(options);
});
}
createPreviewPanel() {
/* createPreviewPanel() {
this.previewPanel = document.createElement('div');
this.previewPanel.className = 'c4ai-c2c-preview';
this.previewPanel.innerHTML = `
@@ -425,9 +412,9 @@ class Click2CrawlBuilder {
this.previewPanel.style.zIndex = '999999';
this.setupPreviewEventListeners();
}
} */
setupPreviewEventListeners() {
/* setupPreviewEventListeners() {
// Close button
this.previewPanel.querySelector('.c4ai-preview-close').addEventListener('click', () => {
this.previewPanel.style.display = 'none';
@@ -496,9 +483,9 @@ class Click2CrawlBuilder {
this.previewPanel.querySelector('.c4ai-download-btn').addEventListener('click', () => {
this.downloadMarkdown();
});
}
} */
switchPreviewTab(tabName) {
/* switchPreviewTab(tabName) {
// Update active tab
this.previewPanel.querySelectorAll('.c4ai-tab').forEach(tab => {
tab.classList.toggle('active', tab.dataset.tab === tabName);
@@ -508,9 +495,9 @@ class Click2CrawlBuilder {
this.previewPanel.querySelectorAll('.c4ai-preview-pane').forEach(pane => {
pane.classList.toggle('active', pane.dataset.pane === tabName);
});
}
} */
async updatePreviewContent(markdown) {
/* async updatePreviewContent(markdown) {
// Update markdown pane
const markdownPane = this.previewPanel.querySelector('[data-pane="markdown"]');
markdownPane.innerHTML = `<pre><code>${this.escapeHtml(markdown)}</code></pre>`;
@@ -535,19 +522,19 @@ class Click2CrawlBuilder {
// Fallback if marked.js is not available
previewPane.innerHTML = `<div class="c4ai-markdown-preview"><pre>${this.escapeHtml(markdown)}</pre></div>`;
}
}
} */
escapeHtml(unsafe) {
/* escapeHtml(unsafe) {
return unsafe
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}
} */
async generateMarkdown() {
async generateMarkdown(options) {
// Get selected elements as array
const elements = Array.from(this.selectedElements);
@@ -565,7 +552,7 @@ class Click2CrawlBuilder {
const element = sortedElements[i];
// Add XPath header if enabled
if (this.options.includeXPath) {
if (options.includeXPath) {
const xpath = this.getXPath(element);
markdownParts.push(`### Element ${i + 1} - XPath: \`${xpath}\`\n`);
}
@@ -574,7 +561,7 @@ class Click2CrawlBuilder {
let elementsToConvert = [element];
// If text-only mode and element is a TR, process the entire table for better context
if (this.options.textOnly && element.tagName === 'TR') {
if (options.textOnly && element.tagName === 'TR') {
const table = element.closest('table');
if (table && !sortedElements.includes(table)) {
// Only include this table row, not the whole table
@@ -585,19 +572,21 @@ class Click2CrawlBuilder {
// Analyze and convert individual element
const analysis = await this.contentAnalyzer.analyze(elementsToConvert);
const markdown = await this.markdownConverter.convert(elementsToConvert, {
...this.options,
...options,
analysis
});
markdownParts.push(markdown.trim());
// Trim the markdown before adding
const trimmedMarkdown = markdown.trim();
markdownParts.push(trimmedMarkdown);
// Add separator if enabled and not last element
if (this.options.addSeparators && i < sortedElements.length - 1) {
markdownParts.push('\n\n---\n\n');
if (options.addSeparators && i < sortedElements.length - 1) {
markdownParts.push('\n---\n');
}
}
return markdownParts.join('\n\n');
return markdownParts.join('\n');
}
getXPath(element) {
@@ -642,35 +631,15 @@ class Click2CrawlBuilder {
}
async copyToClipboard() {
const markdown = await this.generateMarkdown();
try {
await navigator.clipboard.writeText(markdown);
this.showNotification('Markdown copied to clipboard!');
} catch (err) {
console.error('Failed to copy:', err);
this.showNotification('Failed to copy. Please try again.', 'error');
if (this.markdownPreviewModal) {
await this.markdownPreviewModal.copyToClipboard();
}
}
async downloadMarkdown() {
const markdown = await this.generateMarkdown();
const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5);
const filename = `crawl4ai-export-${timestamp}.md`;
// Create blob and download
const blob = new Blob([markdown], { type: 'text/markdown' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
this.showNotification(`Downloaded ${filename}`);
if (this.markdownPreviewModal) {
await this.markdownPreviewModal.downloadMarkdown();
}
}
showNotification(message, type = 'success') {
@@ -707,9 +676,9 @@ class Click2CrawlBuilder {
this.toolbar = null;
}
if (this.previewPanel) {
this.previewPanel.remove();
this.previewPanel = null;
if (this.markdownPreviewModal) {
this.markdownPreviewModal.destroy();
this.markdownPreviewModal = null;
}
// Remove hover styles
@@ -726,7 +695,7 @@ class Click2CrawlBuilder {
}
} catch (error) {
// Extension context might be invalidated, ignore the error
console.log('Click2Crawl deactivated (extension context unavailable)');
console.log('Markdown Extraction deactivated (extension context unavailable)');
}
}
}

View File

@@ -0,0 +1,300 @@
// Shared Markdown Preview Modal Component for Crawl4AI Assistant
// Used by both SchemaBuilder and Click2CrawlBuilder
class MarkdownPreviewModal {
constructor(options = {}) {
this.modal = null;
this.markdownOptions = {
includeImages: true,
preserveTables: true,
keepCodeFormatting: true,
simplifyLayout: false,
preserveLinks: true,
addSeparators: true,
includeXPath: false,
textOnly: false,
...options
};
this.onGenerateMarkdown = null;
this.currentMarkdown = '';
}
show(generateMarkdownCallback) {
this.onGenerateMarkdown = generateMarkdownCallback;
if (!this.modal) {
this.createModal();
}
// Generate initial markdown
this.updateContent();
this.modal.style.display = 'block';
}
hide() {
if (this.modal) {
this.modal.style.display = 'none';
}
}
createModal() {
this.modal = document.createElement('div');
this.modal.className = 'c4ai-c2c-preview';
this.modal.innerHTML = `
<div class="c4ai-preview-header">
<div class="c4ai-toolbar-dots">
<span class="c4ai-dot c4ai-dot-red"></span>
<span class="c4ai-dot c4ai-dot-yellow"></span>
<span class="c4ai-dot c4ai-dot-green"></span>
</div>
<span class="c4ai-preview-title">Markdown Preview</span>
<button class="c4ai-preview-close">×</button>
</div>
<div class="c4ai-preview-options">
<label><input type="checkbox" name="textOnly"> 👁️ Visual Text Mode (As You See)</label>
<label><input type="checkbox" name="includeImages" checked> Include Images</label>
<label><input type="checkbox" name="preserveTables" checked> Preserve Tables</label>
<label><input type="checkbox" name="preserveLinks" checked> Preserve Links</label>
<label><input type="checkbox" name="keepCodeFormatting" checked> Keep Code Formatting</label>
<label><input type="checkbox" name="simplifyLayout"> Simplify Layout</label>
<label><input type="checkbox" name="addSeparators" checked> Add Separators</label>
<label><input type="checkbox" name="includeXPath"> Include XPath Headers</label>
</div>
<div class="c4ai-preview-content">
<div class="c4ai-preview-tabs">
<button class="c4ai-tab active" data-tab="preview">Preview</button>
<button class="c4ai-tab" data-tab="markdown">Markdown</button>
<button class="c4ai-wrap-toggle" title="Toggle word wrap">↔️ Wrap</button>
</div>
<div class="c4ai-preview-pane active" data-pane="preview"></div>
<div class="c4ai-preview-pane" data-pane="markdown"></div>
</div>
<div class="c4ai-preview-actions">
<button class="c4ai-download-btn">Download .md</button>
<button class="c4ai-copy-markdown-btn">Copy Markdown</button>
<button class="c4ai-cloud-btn" disabled>Send to Cloud (Coming Soon)</button>
</div>
`;
document.body.appendChild(this.modal);
// Make modal draggable
if (window.C4AI_Utils && window.C4AI_Utils.makeDraggable) {
window.C4AI_Utils.makeDraggable(this.modal);
}
// Position preview modal
this.modal.style.position = 'fixed';
this.modal.style.top = '50%';
this.modal.style.left = '50%';
this.modal.style.transform = 'translate(-50%, -50%)';
this.modal.style.zIndex = '999999';
this.setupEventListeners();
}
setupEventListeners() {
// Close button
this.modal.querySelector('.c4ai-preview-close').addEventListener('click', () => {
this.hide();
});
// Tab switching
this.modal.querySelectorAll('.c4ai-tab').forEach(tab => {
tab.addEventListener('click', (e) => {
const tabName = e.target.dataset.tab;
this.switchTab(tabName);
});
});
// Wrap toggle
const wrapToggle = this.modal.querySelector('.c4ai-wrap-toggle');
wrapToggle.addEventListener('click', () => {
const panes = this.modal.querySelectorAll('.c4ai-preview-pane');
panes.forEach(pane => {
pane.classList.toggle('wrap');
});
wrapToggle.classList.toggle('active');
});
// Options change
this.modal.querySelectorAll('input[type="checkbox"]').forEach(checkbox => {
checkbox.addEventListener('change', async (e) => {
this.markdownOptions[e.target.name] = e.target.checked;
// Handle text-only mode dependencies
if (e.target.name === 'textOnly' && e.target.checked) {
const preserveLinksCheckbox = this.modal.querySelector('input[name="preserveLinks"]');
if (preserveLinksCheckbox) {
preserveLinksCheckbox.checked = false;
preserveLinksCheckbox.disabled = true;
this.markdownOptions.preserveLinks = false;
}
const includeImagesCheckbox = this.modal.querySelector('input[name="includeImages"]');
if (includeImagesCheckbox) {
includeImagesCheckbox.disabled = true;
}
} else if (e.target.name === 'textOnly' && !e.target.checked) {
// Re-enable options when text-only is disabled
const preserveLinksCheckbox = this.modal.querySelector('input[name="preserveLinks"]');
if (preserveLinksCheckbox) {
preserveLinksCheckbox.disabled = false;
}
const includeImagesCheckbox = this.modal.querySelector('input[name="includeImages"]');
if (includeImagesCheckbox) {
includeImagesCheckbox.disabled = false;
}
}
// Update markdown content
await this.updateContent();
});
});
// Action buttons
this.modal.querySelector('.c4ai-copy-markdown-btn').addEventListener('click', () => {
this.copyToClipboard();
});
this.modal.querySelector('.c4ai-download-btn').addEventListener('click', () => {
this.downloadMarkdown();
});
}
switchTab(tabName) {
// Update active tab
this.modal.querySelectorAll('.c4ai-tab').forEach(tab => {
tab.classList.toggle('active', tab.dataset.tab === tabName);
});
// Update active pane
this.modal.querySelectorAll('.c4ai-preview-pane').forEach(pane => {
pane.classList.toggle('active', pane.dataset.pane === tabName);
});
}
async updateContent() {
if (!this.onGenerateMarkdown) return;
try {
// Generate markdown with current options
this.currentMarkdown = await this.onGenerateMarkdown(this.markdownOptions);
// Update markdown pane
const markdownPane = this.modal.querySelector('[data-pane="markdown"]');
markdownPane.innerHTML = `<pre><code>${this.escapeHtml(this.currentMarkdown)}</code></pre>`;
// Update preview pane
const previewPane = this.modal.querySelector('[data-pane="preview"]');
// Use marked.js if available
if (window.marked) {
marked.setOptions({
gfm: true,
breaks: true,
tables: true,
headerIds: false,
mangle: false
});
const html = marked.parse(this.currentMarkdown);
previewPane.innerHTML = `<div class="c4ai-markdown-preview">${html}</div>`;
} else {
// Fallback
previewPane.innerHTML = `<div class="c4ai-markdown-preview"><pre>${this.escapeHtml(this.currentMarkdown)}</pre></div>`;
}
} catch (error) {
console.error('Error generating markdown:', error);
this.showNotification('Error generating markdown', 'error');
}
}
async copyToClipboard() {
try {
await navigator.clipboard.writeText(this.currentMarkdown);
this.showNotification('Markdown copied to clipboard!');
} catch (err) {
console.error('Failed to copy:', err);
this.showNotification('Failed to copy. Please try again.', 'error');
}
}
async downloadMarkdown() {
const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5);
const filename = `crawl4ai-export-${timestamp}.md`;
// Create blob and download
const blob = new Blob([this.currentMarkdown], { type: 'text/markdown' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
this.showNotification(`Downloaded ${filename}`);
}
showNotification(message, type = 'success') {
const notification = document.createElement('div');
notification.className = `c4ai-notification c4ai-notification-${type}`;
notification.textContent = message;
document.body.appendChild(notification);
// Animate in
setTimeout(() => notification.classList.add('show'), 10);
// Remove after 3 seconds
setTimeout(() => {
notification.classList.remove('show');
setTimeout(() => notification.remove(), 300);
}, 3000);
}
escapeHtml(unsafe) {
return unsafe
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}
// Get current options
getOptions() {
return { ...this.markdownOptions };
}
// Update options programmatically
setOptions(options) {
this.markdownOptions = { ...this.markdownOptions, ...options };
// Update checkboxes to reflect new options
Object.entries(options).forEach(([key, value]) => {
const checkbox = this.modal?.querySelector(`input[name="${key}"]`);
if (checkbox && typeof value === 'boolean') {
checkbox.checked = value;
}
});
}
// Cleanup
destroy() {
if (this.modal) {
this.modal.remove();
this.modal = null;
}
this.onGenerateMarkdown = null;
}
}
// Export for use in other scripts
if (typeof window !== 'undefined') {
window.MarkdownPreviewModal = MarkdownPreviewModal;
}

View File

@@ -127,6 +127,7 @@
/* macOS-style titlebar */
.c4ai-toolbar-titlebar {
gap: 1em;
display: flex;
align-items: center;
padding: 12px;

View File

@@ -1,608 +0,0 @@
// SchemaBuilder class for Crawl4AI Chrome Extension
class SchemaBuilder {
constructor() {
this.mode = null;
this.container = null;
this.fields = [];
this.overlay = null;
this.toolbar = null;
this.highlightBox = null;
this.selectedElements = new Set();
this.isPaused = false;
this.codeModal = null;
this.handleMouseMove = this.handleMouseMove.bind(this);
this.handleClick = this.handleClick.bind(this);
this.handleKeyPress = this.handleKeyPress.bind(this);
}
start() {
this.mode = 'container';
this.createOverlay();
this.createToolbar();
this.attachEventListeners();
this.updateToolbar();
}
stop() {
this.detachEventListeners();
this.overlay?.remove();
this.toolbar?.remove();
this.highlightBox?.remove();
this.removeAllHighlights();
this.mode = null;
this.container = null;
this.fields = [];
this.selectedElements.clear();
}
createOverlay() {
// Create highlight box
this.highlightBox = document.createElement('div');
this.highlightBox.className = 'c4ai-highlight-box';
document.body.appendChild(this.highlightBox);
}
createToolbar() {
this.toolbar = document.createElement('div');
this.toolbar.className = 'c4ai-toolbar';
this.toolbar.innerHTML = `
<div class="c4ai-toolbar-titlebar">
<div class="c4ai-titlebar-dots">
<button class="c4ai-dot c4ai-dot-close" id="c4ai-close"></button>
<button class="c4ai-dot c4ai-dot-minimize"></button>
<button class="c4ai-dot c4ai-dot-maximize"></button>
</div>
<img src="${chrome.runtime.getURL('icons/icon-16.png')}" class="c4ai-titlebar-icon" alt="Crawl4AI">
<div class="c4ai-titlebar-title">Crawl4AI Schema Builder</div>
</div>
<div class="c4ai-toolbar-content">
<div class="c4ai-toolbar-status">
<div class="c4ai-status-item">
<span class="c4ai-status-label">Mode:</span>
<span class="c4ai-status-value" id="c4ai-mode">Select Container</span>
</div>
<div class="c4ai-status-item">
<span class="c4ai-status-label">Container:</span>
<span class="c4ai-status-value" id="c4ai-container">Not selected</span>
</div>
</div>
<div class="c4ai-fields-list" id="c4ai-fields-list" style="display: none;">
<div class="c4ai-fields-header">Selected Fields:</div>
<ul class="c4ai-fields-items" id="c4ai-fields-items"></ul>
</div>
<div class="c4ai-toolbar-hint" id="c4ai-hint">
Click on a container element (e.g., product card, article, etc.)
</div>
<div class="c4ai-toolbar-actions">
<button id="c4ai-pause" class="c4ai-action-btn c4ai-pause-btn">
<span class="c4ai-pause-icon">⏸</span> Pause
</button>
<button id="c4ai-generate" class="c4ai-action-btn c4ai-generate-btn">
<span class="c4ai-generate-icon">⚡</span> Generate Code
</button>
</div>
</div>
`;
document.body.appendChild(this.toolbar);
// Add event listeners for toolbar buttons
document.getElementById('c4ai-pause').addEventListener('click', () => this.togglePause());
document.getElementById('c4ai-generate').addEventListener('click', () => this.stopAndGenerate());
document.getElementById('c4ai-close').addEventListener('click', () => this.stop());
// Make toolbar draggable
window.C4AI_Utils.makeDraggable(this.toolbar);
}
attachEventListeners() {
document.addEventListener('mousemove', this.handleMouseMove, true);
document.addEventListener('click', this.handleClick, true);
document.addEventListener('keydown', this.handleKeyPress, true);
}
detachEventListeners() {
document.removeEventListener('mousemove', this.handleMouseMove, true);
document.removeEventListener('click', this.handleClick, true);
document.removeEventListener('keydown', this.handleKeyPress, true);
}
handleMouseMove(e) {
if (this.isPaused) return;
const element = document.elementFromPoint(e.clientX, e.clientY);
if (element && !this.isOurElement(element)) {
this.highlightElement(element);
}
}
handleClick(e) {
if (this.isPaused) return;
const element = e.target;
if (this.isOurElement(element)) {
return;
}
e.preventDefault();
e.stopPropagation();
if (this.mode === 'container') {
this.selectContainer(element);
} else if (this.mode === 'field') {
this.selectField(element);
}
}
handleKeyPress(e) {
if (e.key === 'Escape') {
this.stop();
}
}
isOurElement(element) {
return window.C4AI_Utils.isOurElement(element);
}
togglePause() {
this.isPaused = !this.isPaused;
const pauseBtn = document.getElementById('c4ai-pause');
if (this.isPaused) {
pauseBtn.innerHTML = '<span class="c4ai-play-icon">▶</span> Resume';
pauseBtn.classList.add('c4ai-paused');
this.highlightBox.style.display = 'none';
} else {
pauseBtn.innerHTML = '<span class="c4ai-pause-icon">⏸</span> Pause';
pauseBtn.classList.remove('c4ai-paused');
}
}
stopAndGenerate() {
if (!this.container || this.fields.length === 0) {
alert('Please select a container and at least one field before generating code.');
return;
}
const code = this.generateCode();
this.showCodeModal(code);
}
highlightElement(element) {
const rect = element.getBoundingClientRect();
this.highlightBox.style.cssText = `
left: ${rect.left + window.scrollX}px;
top: ${rect.top + window.scrollY}px;
width: ${rect.width}px;
height: ${rect.height}px;
display: block;
`;
if (this.mode === 'container') {
this.highlightBox.className = 'c4ai-highlight-box c4ai-container-mode';
} else {
this.highlightBox.className = 'c4ai-highlight-box c4ai-field-mode';
}
}
selectContainer(element) {
// Remove previous container highlight
if (this.container) {
this.container.element.classList.remove('c4ai-selected-container');
}
this.container = {
element: element,
html: element.outerHTML,
selector: this.generateSelector(element),
tagName: element.tagName.toLowerCase()
};
element.classList.add('c4ai-selected-container');
this.mode = 'field';
this.updateToolbar();
this.updateStats();
}
selectField(element) {
// Don't select the container itself
if (element === this.container.element) {
return;
}
// Check if already selected - if so, deselect it
if (this.selectedElements.has(element)) {
this.deselectField(element);
return;
}
// Must be inside the container
if (!this.container.element.contains(element)) {
return;
}
this.showFieldDialog(element);
}
deselectField(element) {
// Remove from fields array
this.fields = this.fields.filter(f => f.element !== element);
// Remove from selected elements set
this.selectedElements.delete(element);
// Remove visual selection
element.classList.remove('c4ai-selected-field');
// Update UI
this.updateToolbar();
this.updateStats();
}
showFieldDialog(element) {
const dialog = document.createElement('div');
dialog.className = 'c4ai-field-dialog';
const rect = element.getBoundingClientRect();
dialog.style.cssText = `
left: ${rect.left + window.scrollX}px;
top: ${rect.bottom + window.scrollY + 10}px;
`;
dialog.innerHTML = `
<div class="c4ai-field-dialog-content">
<h4>Name this field:</h4>
<input type="text" id="c4ai-field-name" placeholder="e.g., title, price, description" autofocus>
<div class="c4ai-field-preview">
<strong>Content:</strong> ${element.textContent.trim().substring(0, 50)}...
</div>
<div class="c4ai-field-actions">
<button id="c4ai-field-save">Save</button>
<button id="c4ai-field-cancel">Cancel</button>
</div>
</div>
`;
document.body.appendChild(dialog);
const input = dialog.querySelector('#c4ai-field-name');
const saveBtn = dialog.querySelector('#c4ai-field-save');
const cancelBtn = dialog.querySelector('#c4ai-field-cancel');
const save = () => {
const fieldName = input.value.trim();
if (fieldName) {
this.fields.push({
name: fieldName,
value: element.textContent.trim(),
element: element,
selector: this.generateSelector(element, this.container.element)
});
element.classList.add('c4ai-selected-field');
this.selectedElements.add(element);
this.updateToolbar();
this.updateStats();
}
dialog.remove();
};
const cancel = () => {
dialog.remove();
};
saveBtn.addEventListener('click', save);
cancelBtn.addEventListener('click', cancel);
input.addEventListener('keypress', (e) => {
if (e.key === 'Enter') save();
if (e.key === 'Escape') cancel();
});
input.focus();
}
generateSelector(element, context = document) {
// Try to generate a robust selector
if (element.id) {
return `#${CSS.escape(element.id)}`;
}
// Check for data attributes (most stable)
const dataAttrs = ['data-testid', 'data-id', 'data-test', 'data-cy'];
for (const attr of dataAttrs) {
const value = element.getAttribute(attr);
if (value) {
return `[${attr}="${value}"]`;
}
}
// Check for aria-label
if (element.getAttribute('aria-label')) {
return `[aria-label="${element.getAttribute('aria-label')}"]`;
}
// Try semantic HTML elements with text
const tagName = element.tagName.toLowerCase();
if (['button', 'a', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) {
const text = element.textContent.trim();
if (text && text.length < 50) {
// Use tag name with partial text match
return `${tagName}`;
}
}
// Check for simple, non-utility classes
const classes = Array.from(element.classList)
.filter(c => !c.startsWith('c4ai-')) // Exclude our classes
.filter(c => !c.includes('[') && !c.includes('(') && !c.includes(':')) // Exclude utility classes
.filter(c => c.length < 30); // Exclude very long classes
if (classes.length > 0 && classes.length <= 3) {
const selector = classes.map(c => `.${CSS.escape(c)}`).join('');
try {
if (context.querySelectorAll(selector).length === 1) {
return selector;
}
} catch (e) {
// Invalid selector, continue
}
}
// Use nth-child with simple parent tag
const parent = element.parentElement;
if (parent && parent !== context) {
const siblings = Array.from(parent.children);
const index = siblings.indexOf(element) + 1;
// Just use parent tag name to avoid recursion
const parentTag = parent.tagName.toLowerCase();
return `${parentTag} > ${tagName}:nth-child(${index})`;
}
// Final fallback
return tagName;
}
updateToolbar() {
document.getElementById('c4ai-mode').textContent =
this.mode === 'container' ? 'Select Container' : 'Select Fields';
document.getElementById('c4ai-container').textContent =
this.container ? `${this.container.tagName}` : 'Not selected';
// Update fields list
const fieldsList = document.getElementById('c4ai-fields-list');
const fieldsItems = document.getElementById('c4ai-fields-items');
if (this.fields.length > 0) {
fieldsList.style.display = 'block';
fieldsItems.innerHTML = this.fields.map(field => `
<li class="c4ai-field-item">
<span class="c4ai-field-name">${field.name}</span>
<span class="c4ai-field-value">${field.value.substring(0, 30)}${field.value.length > 30 ? '...' : ''}</span>
</li>
`).join('');
} else {
fieldsList.style.display = 'none';
}
const hint = document.getElementById('c4ai-hint');
if (this.mode === 'container') {
hint.textContent = 'Click on a container element (e.g., product card, article, etc.)';
} else if (this.fields.length === 0) {
hint.textContent = 'Click on fields inside the container to extract (title, price, etc.)';
} else {
hint.innerHTML = `Continue selecting fields or click <strong>Stop & Generate</strong> to finish.`;
}
}
updateStats() {
chrome.runtime.sendMessage({
action: 'updateStats',
stats: {
container: !!this.container,
fields: this.fields.length
}
});
}
removeAllHighlights() {
document.querySelectorAll('.c4ai-selected-container').forEach(el => {
el.classList.remove('c4ai-selected-container');
});
document.querySelectorAll('.c4ai-selected-field').forEach(el => {
el.classList.remove('c4ai-selected-field');
});
}
generateCode() {
const fieldDescriptions = this.fields.map(f =>
`- ${f.name} (example: "${f.value.substring(0, 50)}...")`
).join('\n');
return `#!/usr/bin/env python3
"""
Generated by Crawl4AI Chrome Extension
URL: ${window.location.href}
Generated: ${new Date().toISOString()}
"""
import asyncio
import json
from pathlib import Path
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
# HTML snippet of the selected container element
HTML_SNIPPET = """
${this.container.html}
"""
# Extraction query based on your field selections
EXTRACTION_QUERY = """
Create a JSON CSS extraction schema to extract the following fields:
${fieldDescriptions}
The schema should handle multiple ${this.container.tagName} elements on the page.
Each item should be extracted as a separate object in the results array.
"""
async def generate_schema():
"""Generate extraction schema using LLM"""
print("🔧 Generating extraction schema...")
try:
# Generate the schema using Crawl4AI's built-in LLM integration
schema = JsonCssExtractionStrategy.generate_schema(
html=HTML_SNIPPET,
query=EXTRACTION_QUERY,
)
# Save the schema for reuse
schema_path = Path('generated_schema.json')
with open(schema_path, 'w') as f:
json.dump(schema, f, indent=2)
print("✅ Schema generated successfully!")
print(f"📄 Schema saved to: {schema_path}")
print("\\nGenerated schema:")
print(json.dumps(schema, indent=2))
return schema
except Exception as e:
print(f"❌ Error generating schema: {e}")
return None
async def test_extraction(url: str = "${window.location.href}"):
"""Test the generated schema on the actual webpage"""
print("\\n🧪 Testing extraction on live webpage...")
# Load the generated schema
try:
with open('generated_schema.json', 'r') as f:
schema = json.load(f)
except FileNotFoundError:
print("❌ Schema file not found. Run generate_schema() first.")
return
# Configure browser
browser_config = BrowserConfig(
headless=True,
verbose=False
)
# Configure extraction
crawler_config = CrawlerRunConfig(
extraction_strategy=JsonCssExtractionStrategy(schema=schema)
)
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun(
url=url,
config=crawler_config
)
if result.success and result.extracted_content:
data = json.loads(result.extracted_content)
print(f"\\n✅ Successfully extracted {len(data)} items!")
# Save results
with open('extracted_data.json', 'w') as f:
json.dump(data, f, indent=2)
# Show sample results
print("\\n📊 Sample results (first 2 items):")
for i, item in enumerate(data[:2], 1):
print(f"\\nItem {i}:")
for key, value in item.items():
print(f" {key}: {value}")
else:
print("❌ Extraction failed:", result.error_message)
if __name__ == "__main__":
# Step 1: Generate the schema from HTML snippet
asyncio.run(generate_schema())
# Step 2: Test extraction on the live webpage
# Uncomment the line below to test extraction:
# asyncio.run(test_extraction())
print("\\n🎯 Next steps:")
print("1. Review the generated schema in 'generated_schema.json'")
print("2. Uncomment the test_extraction() line to test on the live site")
print("3. Use the schema in your Crawl4AI projects!")
`;
return code;
}
showCodeModal(code) {
// Create modal
this.codeModal = document.createElement('div');
this.codeModal.className = 'c4ai-code-modal';
this.codeModal.innerHTML = `
<div class="c4ai-code-modal-content">
<div class="c4ai-code-modal-header">
<h2>Generated Python Code</h2>
<button class="c4ai-close-modal" id="c4ai-close-modal">✕</button>
</div>
<div class="c4ai-code-modal-body">
<pre class="c4ai-code-block"><code class="language-python">${window.C4AI_Utils.escapeHtml(code)}</code></pre>
</div>
<div class="c4ai-code-modal-footer">
<button class="c4ai-action-btn c4ai-cloud-btn" id="c4ai-run-cloud" disabled>
<span>☁️</span> Run on C4AI Cloud (Coming Soon)
</button>
<button class="c4ai-action-btn c4ai-download-btn" id="c4ai-download-code">
<span>⬇</span> Download Code
</button>
<button class="c4ai-action-btn c4ai-copy-btn" id="c4ai-copy-code">
<span>📋</span> Copy to Clipboard
</button>
</div>
</div>
`;
document.body.appendChild(this.codeModal);
// Add event listeners
document.getElementById('c4ai-close-modal').addEventListener('click', () => {
this.codeModal.remove();
this.codeModal = null;
// Don't stop the capture session
});
document.getElementById('c4ai-download-code').addEventListener('click', () => {
chrome.runtime.sendMessage({
action: 'downloadCode',
code: code,
filename: `crawl4ai_schema_${Date.now()}.py`
}, (response) => {
if (response && response.success) {
const btn = document.getElementById('c4ai-download-code');
const originalHTML = btn.innerHTML;
btn.innerHTML = '<span>✓</span> Downloaded!';
setTimeout(() => {
btn.innerHTML = originalHTML;
}, 2000);
} else {
console.error('Download failed:', response?.error);
alert('Download failed. Please check your browser settings.');
}
});
});
document.getElementById('c4ai-copy-code').addEventListener('click', () => {
navigator.clipboard.writeText(code).then(() => {
const btn = document.getElementById('c4ai-copy-code');
btn.innerHTML = '<span>✓</span> Copied!';
setTimeout(() => {
btn.innerHTML = '<span>📋</span> Copy to Clipboard';
}, 2000);
});
});
// Apply syntax highlighting
window.C4AI_Utils.applySyntaxHighlighting(this.codeModal.querySelector('.language-python'));
}
}

View File

@@ -61,14 +61,14 @@
<p>Transform any website into structured data with just a few clicks! The Crawl4AI Assistant Chrome Extension provides three powerful tools for web scraping and data extraction.</p>
<div style="background: #0fbbaa; color: #070708; padding: 12px 16px; border-radius: 8px; margin: 16px 0; font-weight: 600;">
🎉 NEW: Schema Builder now extracts data INSTANTLY without any LLM! Test your schema and see JSON results immediately in the browser!
🎉 NEW: Click2Crawl extracts data INSTANTLY without any LLM! Test your schema and see JSON results immediately in the browser!
</div>
<div class="features-grid">
<div class="feature-card">
<span class="feature-icon">🎯</span>
<h3>Schema Builder</h3>
<p>Extract data instantly without LLMs - see results in real-time!</p>
<h3>Click2Crawl</h3>
<p>Visual data extraction - click elements to build schemas instantly!</p>
</div>
<div class="feature-card">
<span class="feature-icon">🔴</span>
@@ -77,8 +77,8 @@
</div>
<div class="feature-card">
<span class="feature-icon">📝</span>
<h3>Click2Crawl <span style="color: #0fbbaa; font-size: 0.75rem;">(New!)</span></h3>
<p>Select multiple elements to extract clean markdown "as you see"</p>
<h3>Markdown Extraction <span style="color: #0fbbaa; font-size: 0.75rem;">(New!)</span></h3>
<p>Convert any webpage content to clean markdown with Visual Text Mode</p>
</div>
<!-- <div class="feature-card">
<span class="feature-icon">🐍</span>
@@ -104,9 +104,9 @@
<div class="step-content">
<h4>Download the Extension</h4>
<p>Get the latest release from GitHub or use the button below</p>
<a href="crawl4ai-assistant-v1.2.1.zip" class="download-button" download>
<a href="crawl4ai-assistant-v1.3.0.zip" class="download-button" download>
<span class="button-icon"></span>
Download Extension (v1.2.1)
Download Extension (v1.3.0)
</a>
</div>
</div>
@@ -136,10 +136,10 @@
<div class="tools-container">
<!-- Left Panel - Tool Selector -->
<div class="tools-panel">
<div class="tool-selector active" data-tool="schema-builder">
<div class="tool-icon">📊</div>
<div class="tool-selector active" data-tool="click2crawl">
<div class="tool-icon">🎯</div>
<div class="tool-info">
<h3>Schema Builder</h3>
<h3>Click2Crawl</h3>
<p>Visual data extraction</p>
</div>
<div class="tool-status">Available</div>
@@ -154,11 +154,11 @@
<div class="tool-status alpha">Alpha</div>
</div>
<div class="tool-selector" data-tool="click2crawl">
<div class="tool-selector" data-tool="markdown-extraction">
<div class="tool-icon">📝</div>
<div class="tool-info">
<h3>Click2Crawl</h3>
<p>Markdown extraction</p>
<h3>Markdown Extraction</h3>
<p>Content to markdown</p>
</div>
<div class="tool-status new">New!</div>
</div>
@@ -166,11 +166,11 @@
<!-- Right Panel - Tool Details -->
<div class="tool-details">
<!-- Schema Builder Details -->
<div class="tool-content active" id="schema-builder">
<!-- Click2Crawl Details -->
<div class="tool-content active" id="click2crawl">
<div class="tool-header">
<h3>📊 Schema Builder</h3>
<span class="tool-tagline">No LLM needed - Extract data instantly!</span>
<h3>🎯 Click2Crawl</h3>
<span class="tool-tagline">Click elements to build extraction schemas - No LLM needed!</span>
</div>
<div class="tool-steps">
@@ -199,8 +199,8 @@
<div class="step-item">
<div class="step-number">3</div>
<div class="step-content">
<h4>Test & Extract Data NOW!</h4>
<p>🎉 Click "Test Schema" to extract ALL matching data instantly - no coding required!</p>
<h4>Test & Extract Data Instantly!</h4>
<p>🎉 Click "Test Schema" to see extracted JSON immediately - no LLM or coding required!</p>
<div class="step-visual">
<span class="highlight-accent"></span> See extracted JSON immediately
</div>
@@ -210,11 +210,12 @@
<div class="tool-features">
<div class="feature-tag">🚀 Zero LLM dependency</div>
<div class="feature-tag">📊 Instant data extraction</div>
<div class="feature-tag">🎯 Smart selector generation</div>
<div class="feature-tag">🐍 Ready-to-run Python code</div>
<div class="feature-tag">Preview matching elements</div>
<div class="feature-tag">📥 Download JSON results</div>
<div class="feature-tag">📊 Instant JSON extraction</div>
<div class="feature-tag">🎯 Visual element selection</div>
<div class="feature-tag">🐍 Export Python code</div>
<div class="feature-tag">Live preview</div>
<div class="feature-tag">📥 Download results</div>
<div class="feature-tag">📝 Export to markdown</div>
</div>
</div>
@@ -268,11 +269,11 @@
</div>
</div>
<!-- Click2Crawl Details -->
<div class="tool-content" id="click2crawl">
<!-- Markdown Extraction Details -->
<div class="tool-content" id="markdown-extraction">
<div class="tool-header">
<h3>📝 Click2Crawl</h3>
<span class="tool-tagline">Select multiple elements to extract clean markdown</span>
<h3>📝 Markdown Extraction</h3>
<span class="tool-tagline">Convert webpage content to clean markdown "as you see"</span>
</div>
<div class="tool-steps">
@@ -312,9 +313,9 @@
<div class="tool-features">
<div class="feature-tag">Multi-select with Ctrl/Cmd</div>
<div class="feature-tag">Visual Text Mode</div>
<div class="feature-tag">Smart formatting</div>
<div class="feature-tag">Cloud export (soon)</div>
<div class="feature-tag">Visual Text Mode (As You See)</div>
<div class="feature-tag">Clean markdown output</div>
<div class="feature-tag">Export to Crawl4AI Cloud (soon)</div>
</div>
</div>
</div>
@@ -326,26 +327,26 @@
<h2>See the Generated Code & Extracted Data</h2>
<div class="code-tabs">
<button class="code-tab active" data-example="schema">📊 Schema Builder</button>
<button class="code-tab active" data-example="schema">🎯 Click2Crawl</button>
<button class="code-tab" data-example="script">🔴 Script Builder</button>
<button class="code-tab" data-example="markdown">📝 Click2Crawl</button>
<button class="code-tab" data-example="markdown">📝 Markdown Extraction</button>
</div>
<div class="code-examples">
<!-- Schema Builder Code -->
<!-- Click2Crawl Code -->
<div class="code-example active" id="code-schema">
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 16px;">
<!-- Python Code -->
<div class="terminal-window">
<div class="terminal-header">
<span class="terminal-title">schema_extraction.py</span>
<span class="terminal-title">click2crawl_extraction.py</span>
<button class="copy-button" data-code="schema-python">Copy</button>
</div>
<div class="terminal-content">
<pre><code><span class="comment">#!/usr/bin/env python3</span>
<span class="comment">"""
🎉 NO LLM NEEDED! Direct extraction with CSS selectors
Generated by Crawl4AI Chrome Extension
Generated by Crawl4AI Chrome Extension - Click2Crawl
"""</span>
<span class="keyword">import</span> asyncio
@@ -353,7 +354,7 @@ Generated by Crawl4AI Chrome Extension
<span class="keyword">from</span> crawl4ai <span class="keyword">import</span> AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
<span class="keyword">from</span> crawl4ai.extraction_strategy <span class="keyword">import</span> JsonCssExtractionStrategy
<span class="comment"># The EXACT schema from your visual clicks - no guessing!</span>
<span class="comment"># The EXACT schema from Click2Crawl - no guessing!</span>
EXTRACTION_SCHEMA = {
<span class="string">"name"</span>: <span class="string">"Product Catalog"</span>,
<span class="string">"baseSelector"</span>: <span class="string">"div.product-card"</span>, <span class="comment"># The container you selected</span>
@@ -515,7 +516,7 @@ asyncio.run(automate_shopping())</code></pre>
</div>
</div>
<!-- Click2Crawl Markdown Output -->
<!-- Markdown Extraction Output -->
<div class="code-example" id="code-markdown">
<div class="terminal-window">
<div class="terminal-header">
@@ -692,20 +693,20 @@ Today, finding a 24-hour restaurant in Manhattan requires genuine effort. The pa
<div class="coming-feature">
<div class="feature-header">
<span class="feature-badge">Direct</span>
<h3>Get CrawlResult Without Code</h3>
<h3>Direct Data Download</h3>
</div>
<p>Skip the code generation entirely! Get extracted data directly in the extension as a CrawlResult object, ready to download as JSON.</p>
<p>Skip the code generation entirely! Download extracted data directly from Click2Crawl as JSON or CSV files.</p>
<div class="feature-preview">
<code>📊 One-click extraction • No Python needed • Export to JSON/CSV</code>
<code>📊 One-click download • No Python needed • Multiple export formats</code>
</div>
</div>
<div class="coming-feature">
<div class="feature-header">
<span class="feature-badge">AI</span>
<h3>Smart Schema Suggestions</h3>
<h3>Smart Field Detection</h3>
</div>
<p>AI-powered field detection that automatically suggests the most likely data fields on any page, making schema building even faster.</p>
<p>AI-powered field detection for Click2Crawl that automatically suggests the most likely data fields on any page.</p>
<div class="feature-preview">
<code>🤖 Auto-detect fields • Smart naming • Pattern recognition</code>
</div>
@@ -758,7 +759,10 @@ Today, finding a 24-hour restaurant in Manhattan requires genuine effort. The pa
// Show corresponding content
const toolId = this.getAttribute('data-tool');
document.getElementById(toolId).classList.add('active');
const contentElement = document.getElementById(toolId);
if (contentElement) {
contentElement.classList.add('active');
}
});
});

View File

@@ -1,7 +1,7 @@
{
"manifest_version": 3,
"name": "Crawl4AI Assistant",
"version": "1.2.1",
"version": "1.3.0",
"description": "Visual schema and script builder for Crawl4AI - Build extraction schemas and automation scripts by clicking and recording actions",
"permissions": [
"activeTab",
@@ -25,11 +25,12 @@
"js": [
"libs/marked.min.js",
"content/shared/utils.js",
"content/schemaBuilder.js",
"content/markdownPreviewModal.js",
"content/click2crawl.js",
"content/scriptBuilder.js",
"content/contentAnalyzer.js",
"content/markdownConverter.js",
"content/click2CrawlBuilder.js",
"content/markdownExtraction.js",
"content/content.js"
],
"css": ["content/overlay.css"],

View File

@@ -38,6 +38,8 @@ body {
font-family: var(--font-primary);
background: #0a0a0a;
color: #e0e0e0;
border-radius: 16px;
overflow: hidden;
}
.popup-container {

View File

@@ -23,15 +23,15 @@
<div class="mode-selector">
<button id="schema-mode" class="mode-button schema">
<div class="icon">📊</div>
<div class="icon">🎯</div>
<div class="mode-info">
<h3>Schema Builder</h3>
<h3>Click2Crawl</h3>
<p>Click elements to build extraction schemas</p>
</div>
</button>
<button id="script-mode" class="mode-button script">
<div class="icon">🎯</div>
<div class="icon">🔴</div>
<div class="mode-info">
<h3>Script Builder <span style="color: #ff3c74; font-size: 10px;">(Alpha)</span></h3>
<p>Record actions to build automation scripts</p>
@@ -39,9 +39,9 @@
</button>
<button id="c2c-mode" class="mode-button c2c">
<div class="icon"></div>
<div class="icon">📝</div>
<div class="mode-info">
<h3>Click2Crawl</h3>
<h3>Markdown Extraction</h3>
<p>Select elements and convert to clean markdown</p>
</div>
</button>
@@ -72,10 +72,10 @@
</div>
</div>
<div class="instructions">
<div class="instructions" style="display: none;">
<h4>How to use:</h4>
<ol>
<li>Click "Schema Builder" to start</li>
<li>Click "Click2Crawl" to start</li>
<li>Click on a container element (e.g., product card)</li>
<li>Click individual fields inside and name them</li>
<li>Generate Python code when done</li>