Refactor Crawl4AI Assistant: Rename Schema Builder to Click2Crawl, update UI elements, and remove deprecated files
- Updated overlay.css to add gap in titlebar. - Deleted schemaBuilder_v1.js and associated zip files (v1.0.0 to v1.2.0). - Modified index.html to reflect new Click2Crawl feature and updated descriptions. - Updated manifest.json to include new JavaScript files for Click2Crawl and markdown extraction. - Refined popup styles and HTML to align with new feature names and functionalities. - Enhanced user instructions and tooltips to guide users on the new Click2Crawl and Markdown Extraction features.
This commit is contained in:
@@ -20,7 +20,8 @@
|
||||
"Bash(docker logs:*)",
|
||||
"Bash(curl:*)",
|
||||
"Bash(docker compose:*)",
|
||||
"Bash(./test-final-integration.sh:*)"
|
||||
"Bash(./test-final-integration.sh:*)",
|
||||
"Bash(mv:*)"
|
||||
]
|
||||
},
|
||||
"enableAllProjectMcpServers": false
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
# Crawl4AI Chrome Extension
|
||||
|
||||
Visual schema and script builder for Crawl4AI - Build extraction schemas by clicking on webpage elements!
|
||||
Visual extraction tools for Crawl4AI - Click to extract data and content from any webpage!
|
||||
|
||||
## 🚀 Features
|
||||
|
||||
- **Visual Schema Builder**: Click on elements to build extraction schemas
|
||||
- **Click2Crawl**: Click on elements to build data extraction schemas instantly
|
||||
- **Markdown Extraction**: Select elements and export as clean markdown
|
||||
- **Script Builder (Alpha)**: Record browser actions to create automation scripts
|
||||
- **Smart Element Selection**: Container and field selection with visual feedback
|
||||
- **Code Generation**: Generates complete Python code with LLM integration
|
||||
- **Code Generation**: Generates complete Python code for Crawl4AI
|
||||
- **Beautiful Dark UI**: Consistent with Crawl4AI's design language
|
||||
- **One-Click Download**: Get your generated code instantly
|
||||
|
||||
## 📦 Installation
|
||||
|
||||
@@ -33,11 +34,11 @@ If you want proper icons:
|
||||
|
||||
## 🎯 How to Use
|
||||
|
||||
### Building a Schema
|
||||
### Using Click2Crawl
|
||||
|
||||
1. **Navigate to any website** you want to extract data from
|
||||
2. **Click the Crawl4AI extension icon** in your toolbar
|
||||
3. **Click "Schema Builder"** to start the capture mode
|
||||
3. **Click "Click2Crawl"** to start the capture mode
|
||||
4. **Select a container element**:
|
||||
- Hover over elements (they'll highlight in blue)
|
||||
- Click on a repeating container (e.g., product card, article block)
|
||||
@@ -45,9 +46,9 @@ If you want proper icons:
|
||||
- Elements will now highlight in green
|
||||
- Click on each piece of data you want to extract
|
||||
- Name each field (e.g., "title", "price", "description")
|
||||
6. **Generate the code**:
|
||||
- Click "Generate Code" in the extension popup
|
||||
- A Python file will automatically download
|
||||
6. **Test and Export**:
|
||||
- Click "Test Schema" to see extracted data instantly
|
||||
- Export as Python code, JSON schema, or markdown
|
||||
|
||||
### Running the Generated Code
|
||||
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
// Enhanced SchemaBuilder class for Crawl4AI Chrome Extension
|
||||
// Click2Crawl class for Crawl4AI Chrome Extension
|
||||
// Click elements to build extraction schemas
|
||||
|
||||
// Singleton instance to prevent multiple toolbars
|
||||
let schemaBuilderInstance = null;
|
||||
let click2CrawlInstance = null;
|
||||
|
||||
class SchemaBuilder {
|
||||
class Click2Crawl {
|
||||
constructor() {
|
||||
// Prevent multiple instances
|
||||
if (schemaBuilderInstance) {
|
||||
schemaBuilderInstance.stop();
|
||||
if (click2CrawlInstance) {
|
||||
click2CrawlInstance.stop();
|
||||
}
|
||||
schemaBuilderInstance = this;
|
||||
click2CrawlInstance = this;
|
||||
|
||||
this.container = null;
|
||||
this.fields = [];
|
||||
@@ -57,9 +58,15 @@ class SchemaBuilder {
|
||||
this.inspectingFields = false;
|
||||
this.parentLevels = 1;
|
||||
|
||||
// Clean up markdown preview modal
|
||||
if (this.markdownPreviewModal) {
|
||||
this.markdownPreviewModal.destroy();
|
||||
this.markdownPreviewModal = null;
|
||||
}
|
||||
|
||||
// Clear singleton reference
|
||||
if (schemaBuilderInstance === this) {
|
||||
schemaBuilderInstance = null;
|
||||
if (click2CrawlInstance === this) {
|
||||
click2CrawlInstance = null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,8 +104,8 @@ class SchemaBuilder {
|
||||
<button class="c4ai-dot c4ai-dot-minimize"></button>
|
||||
<button class="c4ai-dot c4ai-dot-maximize"></button>
|
||||
</div>
|
||||
<img src="${chrome.runtime.getURL('icons/icon-16.png')}" class="c4ai-titlebar-icon" alt="Crawl4AI">
|
||||
<div class="c4ai-titlebar-title">🔧 Schema Builder</div>
|
||||
<div class="c4ai-titlebar-title"> Click2Crawl</div>
|
||||
<img src="${chrome.runtime.getURL('icons/icon-16.png')}" class="c4ai-titlebar-icon" alt="Crawl4AI" style="margin-left: auto;">
|
||||
</div>
|
||||
<div class="c4ai-toolbar-content">
|
||||
<div class="c4ai-toolbar-status">
|
||||
@@ -151,6 +158,9 @@ class SchemaBuilder {
|
||||
<button id="c4ai-export-data" class="c4ai-action-btn c4ai-export-btn" disabled>
|
||||
<span>📊</span> Data
|
||||
</button>
|
||||
<button id="c4ai-export-markdown" class="c4ai-action-btn c4ai-export-btn" disabled>
|
||||
<span>📝</span> Markdown
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -202,6 +212,7 @@ class SchemaBuilder {
|
||||
addClickHandler('c4ai-test', () => this.testSchema());
|
||||
addClickHandler('c4ai-export-schema', () => this.exportSchema());
|
||||
addClickHandler('c4ai-export-data', () => this.exportData());
|
||||
addClickHandler('c4ai-export-markdown', () => this.exportMarkdown());
|
||||
addClickHandler('c4ai-deploy-cloud', () => this.deployToCloud());
|
||||
addClickHandler('c4ai-close', () => this.stop());
|
||||
|
||||
@@ -273,11 +284,16 @@ class SchemaBuilder {
|
||||
handleClick(e) {
|
||||
const element = e.target;
|
||||
|
||||
// Check if clicking on our UI elements
|
||||
// Check if clicking on our UI elements (including markdown preview modal)
|
||||
if (this.isOurElement(element)) {
|
||||
return; // Let toolbar clicks work normally
|
||||
}
|
||||
|
||||
// Additional check for markdown preview modal classes
|
||||
if (element.closest('.c4ai-c2c-preview') || element.closest('.c4ai-preview-options')) {
|
||||
return; // Don't interfere with markdown preview modal
|
||||
}
|
||||
|
||||
// Use current element
|
||||
const targetElement = this.currentElement || element;
|
||||
|
||||
@@ -303,7 +319,9 @@ class SchemaBuilder {
|
||||
|
||||
isOurElement(element) {
|
||||
return window.C4AI_Utils.isOurElement(element) ||
|
||||
(this.selectedBox && element === this.selectedBox);
|
||||
(this.selectedBox && element === this.selectedBox) ||
|
||||
(this.markdownPreviewModal && this.markdownPreviewModal.modal &&
|
||||
(element === this.markdownPreviewModal.modal || this.markdownPreviewModal.modal.contains(element)));
|
||||
}
|
||||
|
||||
showSelectedBox(element) {
|
||||
@@ -499,6 +517,9 @@ class SchemaBuilder {
|
||||
}
|
||||
|
||||
showFieldDialog(element) {
|
||||
// Remove any existing field dialogs first
|
||||
document.querySelectorAll('.c4ai-field-dialog').forEach(d => d.remove());
|
||||
|
||||
const dialog = document.createElement('div');
|
||||
dialog.className = 'c4ai-field-dialog';
|
||||
|
||||
@@ -922,6 +943,7 @@ class SchemaBuilder {
|
||||
document.getElementById('c4ai-test').disabled = false;
|
||||
document.getElementById('c4ai-export-schema').disabled = false;
|
||||
document.getElementById('c4ai-export-data').disabled = false;
|
||||
document.getElementById('c4ai-export-markdown').disabled = false;
|
||||
document.getElementById('c4ai-deploy-cloud').disabled = false;
|
||||
} else {
|
||||
schemaSection.style.display = 'none';
|
||||
@@ -976,6 +998,9 @@ class SchemaBuilder {
|
||||
const field = this.fields[index];
|
||||
if (!field) return;
|
||||
|
||||
// Remove any existing field dialogs first
|
||||
document.querySelectorAll('.c4ai-field-dialog').forEach(d => d.remove());
|
||||
|
||||
// Re-show the field dialog with existing values
|
||||
const dialog = document.createElement('div');
|
||||
dialog.className = 'c4ai-field-dialog';
|
||||
@@ -1476,6 +1501,137 @@ class SchemaBuilder {
|
||||
await this.testSchema();
|
||||
}
|
||||
|
||||
async exportMarkdown() {
|
||||
// Initialize markdown converter if not already done
|
||||
if (!this.markdownConverter) {
|
||||
this.markdownConverter = new MarkdownConverter();
|
||||
}
|
||||
if (!this.contentAnalyzer) {
|
||||
this.contentAnalyzer = new ContentAnalyzer();
|
||||
}
|
||||
|
||||
// Initialize markdown preview modal if not already done
|
||||
if (!this.markdownPreviewModal) {
|
||||
this.markdownPreviewModal = new MarkdownPreviewModal();
|
||||
}
|
||||
|
||||
// Get all matching containers
|
||||
const containers = document.querySelectorAll(this.container.selector);
|
||||
if (containers.length === 0) {
|
||||
this.showNotification('No matching containers found', 'error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Show modal with callback to generate markdown
|
||||
this.markdownPreviewModal.show(async (options) => {
|
||||
return await this.generateMarkdownFromSchema(options);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
async generateMarkdownFromSchema(options) {
|
||||
// Get all matching containers
|
||||
const containers = document.querySelectorAll(this.container.selector);
|
||||
const markdownParts = [];
|
||||
|
||||
for (let i = 0; i < containers.length; i++) {
|
||||
const container = containers[i];
|
||||
|
||||
// Add XPath header if enabled
|
||||
if (options.includeXPath) {
|
||||
const xpath = this.getXPath(container);
|
||||
markdownParts.push(`### Container ${i + 1} - XPath: \`${xpath}\`\n`);
|
||||
}
|
||||
|
||||
// Extract data based on schema fields
|
||||
const extractedData = {};
|
||||
this.fields.forEach(field => {
|
||||
try {
|
||||
const element = container.querySelector(field.selector);
|
||||
if (element) {
|
||||
if (field.type === 'text') {
|
||||
extractedData[field.name] = element.textContent.trim();
|
||||
} else if (field.type === 'attribute' && field.attribute) {
|
||||
extractedData[field.name] = element.getAttribute(field.attribute);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
// Skip invalid selectors
|
||||
}
|
||||
});
|
||||
|
||||
// Convert container to markdown based on options
|
||||
const analysis = await this.contentAnalyzer.analyze([container]);
|
||||
const containerMarkdown = await this.markdownConverter.convert([container], {
|
||||
...options,
|
||||
analysis,
|
||||
extractedData // Pass extracted data for context
|
||||
});
|
||||
|
||||
// Trim the markdown before adding
|
||||
const trimmedMarkdown = containerMarkdown.trim();
|
||||
markdownParts.push(trimmedMarkdown);
|
||||
|
||||
// Add separator if enabled and not last element
|
||||
if (options.addSeparators && i < containers.length - 1) {
|
||||
markdownParts.push('\n---\n');
|
||||
}
|
||||
}
|
||||
|
||||
return markdownParts.join('\n');
|
||||
}
|
||||
|
||||
getXPath(element) {
|
||||
if (element.id) {
|
||||
return `//*[@id="${element.id}"]`;
|
||||
}
|
||||
|
||||
const parts = [];
|
||||
let current = element;
|
||||
|
||||
while (current && current.nodeType === Node.ELEMENT_NODE) {
|
||||
let index = 0;
|
||||
let sibling = current.previousSibling;
|
||||
|
||||
while (sibling) {
|
||||
if (sibling.nodeType === Node.ELEMENT_NODE && sibling.nodeName === current.nodeName) {
|
||||
index++;
|
||||
}
|
||||
sibling = sibling.previousSibling;
|
||||
}
|
||||
|
||||
const tagName = current.nodeName.toLowerCase();
|
||||
const part = index > 0 ? `${tagName}[${index + 1}]` : tagName;
|
||||
parts.unshift(part);
|
||||
|
||||
current = current.parentNode;
|
||||
}
|
||||
|
||||
return '/' + parts.join('/');
|
||||
}
|
||||
|
||||
|
||||
|
||||
showNotification(message, type = 'success') {
|
||||
const notification = document.createElement('div');
|
||||
notification.className = `c4ai-notification c4ai-notification-${type}`;
|
||||
notification.textContent = message;
|
||||
|
||||
document.body.appendChild(notification);
|
||||
|
||||
// Animate in
|
||||
setTimeout(() => notification.classList.add('show'), 10);
|
||||
|
||||
// Remove after 3 seconds
|
||||
setTimeout(() => {
|
||||
notification.classList.remove('show');
|
||||
setTimeout(() => notification.remove(), 300);
|
||||
}, 3000);
|
||||
}
|
||||
|
||||
deployToCloud() {
|
||||
// Create cloud deployment modal
|
||||
const modal = document.createElement('div');
|
||||
@@ -1808,5 +1964,5 @@ if __name__ == "__main__":
|
||||
|
||||
// Export for use in content script
|
||||
if (typeof window !== 'undefined') {
|
||||
window.SchemaBuilder = SchemaBuilder;
|
||||
window.Click2Crawl = Click2Crawl;
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
// Main content script for Crawl4AI Assistant
|
||||
// Coordinates between SchemaBuilder and ScriptBuilder
|
||||
// Coordinates between Click2Crawl, ScriptBuilder, and MarkdownExtraction
|
||||
|
||||
let activeBuilder = null;
|
||||
|
||||
@@ -13,8 +13,8 @@ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
|
||||
}
|
||||
|
||||
if (request.mode === 'schema') {
|
||||
console.log('Starting Schema Builder');
|
||||
activeBuilder = new SchemaBuilder();
|
||||
console.log('Starting Click2Crawl');
|
||||
activeBuilder = new Click2Crawl();
|
||||
activeBuilder.start();
|
||||
} else if (request.mode === 'script') {
|
||||
console.log('Starting Script Builder');
|
||||
@@ -34,8 +34,8 @@ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
|
||||
activeBuilder.deactivate?.();
|
||||
activeBuilder = null;
|
||||
}
|
||||
console.log('Starting Schema Builder');
|
||||
activeBuilder = new SchemaBuilder();
|
||||
console.log('Starting Click2Crawl');
|
||||
activeBuilder = new Click2Crawl();
|
||||
activeBuilder.start();
|
||||
sendResponse({ success: true });
|
||||
} else if (request.action === 'startScriptCapture') {
|
||||
@@ -52,8 +52,8 @@ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
|
||||
activeBuilder.deactivate?.();
|
||||
activeBuilder = null;
|
||||
}
|
||||
console.log('Starting Click2Crawl');
|
||||
activeBuilder = new Click2CrawlBuilder();
|
||||
console.log('Starting Markdown Extraction');
|
||||
activeBuilder = new MarkdownExtraction();
|
||||
sendResponse({ success: true });
|
||||
} else if (request.action === 'generateCode') {
|
||||
if (activeBuilder && activeBuilder.generateCode) {
|
||||
|
||||
@@ -1,26 +1,14 @@
|
||||
class Click2CrawlBuilder {
|
||||
class MarkdownExtraction {
|
||||
constructor() {
|
||||
this.selectedElements = new Set();
|
||||
this.highlightBoxes = new Map();
|
||||
this.selectionMode = false;
|
||||
this.toolbar = null;
|
||||
this.previewPanel = null;
|
||||
this.markdownPreviewModal = null;
|
||||
this.selectionCounter = 0;
|
||||
this.markdownConverter = null;
|
||||
this.contentAnalyzer = null;
|
||||
|
||||
// Configuration options
|
||||
this.options = {
|
||||
includeImages: true,
|
||||
preserveTables: true,
|
||||
keepCodeFormatting: true,
|
||||
simplifyLayout: false,
|
||||
preserveLinks: true,
|
||||
addSeparators: true,
|
||||
includeXPath: false,
|
||||
textOnly: false
|
||||
};
|
||||
|
||||
this.init();
|
||||
}
|
||||
|
||||
@@ -44,7 +32,7 @@ class Click2CrawlBuilder {
|
||||
<span class="c4ai-dot c4ai-dot-yellow"></span>
|
||||
<span class="c4ai-dot c4ai-dot-green"></span>
|
||||
</div>
|
||||
<span class="c4ai-toolbar-title">Click2Crawl</span>
|
||||
<span class="c4ai-toolbar-title">Markdown Extraction</span>
|
||||
<button class="c4ai-close-btn" title="Close">×</button>
|
||||
</div>
|
||||
<div class="c4ai-toolbar-content">
|
||||
@@ -363,19 +351,18 @@ class Click2CrawlBuilder {
|
||||
}
|
||||
|
||||
async showPreview() {
|
||||
// Generate markdown from selected elements
|
||||
const markdown = await this.generateMarkdown();
|
||||
|
||||
// Create or update preview panel
|
||||
if (!this.previewPanel) {
|
||||
this.createPreviewPanel();
|
||||
// Initialize markdown preview modal if not already done
|
||||
if (!this.markdownPreviewModal) {
|
||||
this.markdownPreviewModal = new MarkdownPreviewModal();
|
||||
}
|
||||
|
||||
await this.updatePreviewContent(markdown);
|
||||
this.previewPanel.style.display = 'block';
|
||||
// Show modal with callback to generate markdown
|
||||
this.markdownPreviewModal.show(async (options) => {
|
||||
return await this.generateMarkdown(options);
|
||||
});
|
||||
}
|
||||
|
||||
createPreviewPanel() {
|
||||
/* createPreviewPanel() {
|
||||
this.previewPanel = document.createElement('div');
|
||||
this.previewPanel.className = 'c4ai-c2c-preview';
|
||||
this.previewPanel.innerHTML = `
|
||||
@@ -425,9 +412,9 @@ class Click2CrawlBuilder {
|
||||
this.previewPanel.style.zIndex = '999999';
|
||||
|
||||
this.setupPreviewEventListeners();
|
||||
}
|
||||
} */
|
||||
|
||||
setupPreviewEventListeners() {
|
||||
/* setupPreviewEventListeners() {
|
||||
// Close button
|
||||
this.previewPanel.querySelector('.c4ai-preview-close').addEventListener('click', () => {
|
||||
this.previewPanel.style.display = 'none';
|
||||
@@ -496,9 +483,9 @@ class Click2CrawlBuilder {
|
||||
this.previewPanel.querySelector('.c4ai-download-btn').addEventListener('click', () => {
|
||||
this.downloadMarkdown();
|
||||
});
|
||||
}
|
||||
} */
|
||||
|
||||
switchPreviewTab(tabName) {
|
||||
/* switchPreviewTab(tabName) {
|
||||
// Update active tab
|
||||
this.previewPanel.querySelectorAll('.c4ai-tab').forEach(tab => {
|
||||
tab.classList.toggle('active', tab.dataset.tab === tabName);
|
||||
@@ -508,9 +495,9 @@ class Click2CrawlBuilder {
|
||||
this.previewPanel.querySelectorAll('.c4ai-preview-pane').forEach(pane => {
|
||||
pane.classList.toggle('active', pane.dataset.pane === tabName);
|
||||
});
|
||||
}
|
||||
} */
|
||||
|
||||
async updatePreviewContent(markdown) {
|
||||
/* async updatePreviewContent(markdown) {
|
||||
// Update markdown pane
|
||||
const markdownPane = this.previewPanel.querySelector('[data-pane="markdown"]');
|
||||
markdownPane.innerHTML = `<pre><code>${this.escapeHtml(markdown)}</code></pre>`;
|
||||
@@ -535,19 +522,19 @@ class Click2CrawlBuilder {
|
||||
// Fallback if marked.js is not available
|
||||
previewPane.innerHTML = `<div class="c4ai-markdown-preview"><pre>${this.escapeHtml(markdown)}</pre></div>`;
|
||||
}
|
||||
}
|
||||
} */
|
||||
|
||||
|
||||
escapeHtml(unsafe) {
|
||||
/* escapeHtml(unsafe) {
|
||||
return unsafe
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
} */
|
||||
|
||||
async generateMarkdown() {
|
||||
async generateMarkdown(options) {
|
||||
// Get selected elements as array
|
||||
const elements = Array.from(this.selectedElements);
|
||||
|
||||
@@ -565,7 +552,7 @@ class Click2CrawlBuilder {
|
||||
const element = sortedElements[i];
|
||||
|
||||
// Add XPath header if enabled
|
||||
if (this.options.includeXPath) {
|
||||
if (options.includeXPath) {
|
||||
const xpath = this.getXPath(element);
|
||||
markdownParts.push(`### Element ${i + 1} - XPath: \`${xpath}\`\n`);
|
||||
}
|
||||
@@ -574,7 +561,7 @@ class Click2CrawlBuilder {
|
||||
let elementsToConvert = [element];
|
||||
|
||||
// If text-only mode and element is a TR, process the entire table for better context
|
||||
if (this.options.textOnly && element.tagName === 'TR') {
|
||||
if (options.textOnly && element.tagName === 'TR') {
|
||||
const table = element.closest('table');
|
||||
if (table && !sortedElements.includes(table)) {
|
||||
// Only include this table row, not the whole table
|
||||
@@ -585,19 +572,21 @@ class Click2CrawlBuilder {
|
||||
// Analyze and convert individual element
|
||||
const analysis = await this.contentAnalyzer.analyze(elementsToConvert);
|
||||
const markdown = await this.markdownConverter.convert(elementsToConvert, {
|
||||
...this.options,
|
||||
...options,
|
||||
analysis
|
||||
});
|
||||
|
||||
markdownParts.push(markdown.trim());
|
||||
// Trim the markdown before adding
|
||||
const trimmedMarkdown = markdown.trim();
|
||||
markdownParts.push(trimmedMarkdown);
|
||||
|
||||
// Add separator if enabled and not last element
|
||||
if (this.options.addSeparators && i < sortedElements.length - 1) {
|
||||
markdownParts.push('\n\n---\n\n');
|
||||
if (options.addSeparators && i < sortedElements.length - 1) {
|
||||
markdownParts.push('\n---\n');
|
||||
}
|
||||
}
|
||||
|
||||
return markdownParts.join('\n\n');
|
||||
return markdownParts.join('\n');
|
||||
}
|
||||
|
||||
getXPath(element) {
|
||||
@@ -642,35 +631,15 @@ class Click2CrawlBuilder {
|
||||
}
|
||||
|
||||
async copyToClipboard() {
|
||||
const markdown = await this.generateMarkdown();
|
||||
|
||||
try {
|
||||
await navigator.clipboard.writeText(markdown);
|
||||
this.showNotification('Markdown copied to clipboard!');
|
||||
} catch (err) {
|
||||
console.error('Failed to copy:', err);
|
||||
this.showNotification('Failed to copy. Please try again.', 'error');
|
||||
if (this.markdownPreviewModal) {
|
||||
await this.markdownPreviewModal.copyToClipboard();
|
||||
}
|
||||
}
|
||||
|
||||
async downloadMarkdown() {
|
||||
const markdown = await this.generateMarkdown();
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5);
|
||||
const filename = `crawl4ai-export-${timestamp}.md`;
|
||||
|
||||
// Create blob and download
|
||||
const blob = new Blob([markdown], { type: 'text/markdown' });
|
||||
const url = URL.createObjectURL(blob);
|
||||
|
||||
const a = document.createElement('a');
|
||||
a.href = url;
|
||||
a.download = filename;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
document.body.removeChild(a);
|
||||
URL.revokeObjectURL(url);
|
||||
|
||||
this.showNotification(`Downloaded ${filename}`);
|
||||
if (this.markdownPreviewModal) {
|
||||
await this.markdownPreviewModal.downloadMarkdown();
|
||||
}
|
||||
}
|
||||
|
||||
showNotification(message, type = 'success') {
|
||||
@@ -707,9 +676,9 @@ class Click2CrawlBuilder {
|
||||
this.toolbar = null;
|
||||
}
|
||||
|
||||
if (this.previewPanel) {
|
||||
this.previewPanel.remove();
|
||||
this.previewPanel = null;
|
||||
if (this.markdownPreviewModal) {
|
||||
this.markdownPreviewModal.destroy();
|
||||
this.markdownPreviewModal = null;
|
||||
}
|
||||
|
||||
// Remove hover styles
|
||||
@@ -726,7 +695,7 @@ class Click2CrawlBuilder {
|
||||
}
|
||||
} catch (error) {
|
||||
// Extension context might be invalidated, ignore the error
|
||||
console.log('Click2Crawl deactivated (extension context unavailable)');
|
||||
console.log('Markdown Extraction deactivated (extension context unavailable)');
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,300 @@
|
||||
// Shared Markdown Preview Modal Component for Crawl4AI Assistant
|
||||
// Used by both SchemaBuilder and Click2CrawlBuilder
|
||||
|
||||
class MarkdownPreviewModal {
|
||||
constructor(options = {}) {
|
||||
this.modal = null;
|
||||
this.markdownOptions = {
|
||||
includeImages: true,
|
||||
preserveTables: true,
|
||||
keepCodeFormatting: true,
|
||||
simplifyLayout: false,
|
||||
preserveLinks: true,
|
||||
addSeparators: true,
|
||||
includeXPath: false,
|
||||
textOnly: false,
|
||||
...options
|
||||
};
|
||||
this.onGenerateMarkdown = null;
|
||||
this.currentMarkdown = '';
|
||||
}
|
||||
|
||||
show(generateMarkdownCallback) {
|
||||
this.onGenerateMarkdown = generateMarkdownCallback;
|
||||
|
||||
if (!this.modal) {
|
||||
this.createModal();
|
||||
}
|
||||
|
||||
// Generate initial markdown
|
||||
this.updateContent();
|
||||
this.modal.style.display = 'block';
|
||||
}
|
||||
|
||||
hide() {
|
||||
if (this.modal) {
|
||||
this.modal.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
createModal() {
|
||||
this.modal = document.createElement('div');
|
||||
this.modal.className = 'c4ai-c2c-preview';
|
||||
this.modal.innerHTML = `
|
||||
<div class="c4ai-preview-header">
|
||||
<div class="c4ai-toolbar-dots">
|
||||
<span class="c4ai-dot c4ai-dot-red"></span>
|
||||
<span class="c4ai-dot c4ai-dot-yellow"></span>
|
||||
<span class="c4ai-dot c4ai-dot-green"></span>
|
||||
</div>
|
||||
<span class="c4ai-preview-title">Markdown Preview</span>
|
||||
<button class="c4ai-preview-close">×</button>
|
||||
</div>
|
||||
<div class="c4ai-preview-options">
|
||||
<label><input type="checkbox" name="textOnly"> 👁️ Visual Text Mode (As You See)</label>
|
||||
<label><input type="checkbox" name="includeImages" checked> Include Images</label>
|
||||
<label><input type="checkbox" name="preserveTables" checked> Preserve Tables</label>
|
||||
<label><input type="checkbox" name="preserveLinks" checked> Preserve Links</label>
|
||||
<label><input type="checkbox" name="keepCodeFormatting" checked> Keep Code Formatting</label>
|
||||
<label><input type="checkbox" name="simplifyLayout"> Simplify Layout</label>
|
||||
<label><input type="checkbox" name="addSeparators" checked> Add Separators</label>
|
||||
<label><input type="checkbox" name="includeXPath"> Include XPath Headers</label>
|
||||
</div>
|
||||
<div class="c4ai-preview-content">
|
||||
<div class="c4ai-preview-tabs">
|
||||
<button class="c4ai-tab active" data-tab="preview">Preview</button>
|
||||
<button class="c4ai-tab" data-tab="markdown">Markdown</button>
|
||||
<button class="c4ai-wrap-toggle" title="Toggle word wrap">↔️ Wrap</button>
|
||||
</div>
|
||||
<div class="c4ai-preview-pane active" data-pane="preview"></div>
|
||||
<div class="c4ai-preview-pane" data-pane="markdown"></div>
|
||||
</div>
|
||||
<div class="c4ai-preview-actions">
|
||||
<button class="c4ai-download-btn">Download .md</button>
|
||||
<button class="c4ai-copy-markdown-btn">Copy Markdown</button>
|
||||
<button class="c4ai-cloud-btn" disabled>Send to Cloud (Coming Soon)</button>
|
||||
</div>
|
||||
`;
|
||||
|
||||
document.body.appendChild(this.modal);
|
||||
|
||||
// Make modal draggable
|
||||
if (window.C4AI_Utils && window.C4AI_Utils.makeDraggable) {
|
||||
window.C4AI_Utils.makeDraggable(this.modal);
|
||||
}
|
||||
|
||||
// Position preview modal
|
||||
this.modal.style.position = 'fixed';
|
||||
this.modal.style.top = '50%';
|
||||
this.modal.style.left = '50%';
|
||||
this.modal.style.transform = 'translate(-50%, -50%)';
|
||||
this.modal.style.zIndex = '999999';
|
||||
|
||||
this.setupEventListeners();
|
||||
}
|
||||
|
||||
setupEventListeners() {
|
||||
// Close button
|
||||
this.modal.querySelector('.c4ai-preview-close').addEventListener('click', () => {
|
||||
this.hide();
|
||||
});
|
||||
|
||||
// Tab switching
|
||||
this.modal.querySelectorAll('.c4ai-tab').forEach(tab => {
|
||||
tab.addEventListener('click', (e) => {
|
||||
const tabName = e.target.dataset.tab;
|
||||
this.switchTab(tabName);
|
||||
});
|
||||
});
|
||||
|
||||
// Wrap toggle
|
||||
const wrapToggle = this.modal.querySelector('.c4ai-wrap-toggle');
|
||||
wrapToggle.addEventListener('click', () => {
|
||||
const panes = this.modal.querySelectorAll('.c4ai-preview-pane');
|
||||
panes.forEach(pane => {
|
||||
pane.classList.toggle('wrap');
|
||||
});
|
||||
wrapToggle.classList.toggle('active');
|
||||
});
|
||||
|
||||
// Options change
|
||||
this.modal.querySelectorAll('input[type="checkbox"]').forEach(checkbox => {
|
||||
checkbox.addEventListener('change', async (e) => {
|
||||
this.markdownOptions[e.target.name] = e.target.checked;
|
||||
|
||||
// Handle text-only mode dependencies
|
||||
if (e.target.name === 'textOnly' && e.target.checked) {
|
||||
const preserveLinksCheckbox = this.modal.querySelector('input[name="preserveLinks"]');
|
||||
if (preserveLinksCheckbox) {
|
||||
preserveLinksCheckbox.checked = false;
|
||||
preserveLinksCheckbox.disabled = true;
|
||||
this.markdownOptions.preserveLinks = false;
|
||||
}
|
||||
|
||||
const includeImagesCheckbox = this.modal.querySelector('input[name="includeImages"]');
|
||||
if (includeImagesCheckbox) {
|
||||
includeImagesCheckbox.disabled = true;
|
||||
}
|
||||
} else if (e.target.name === 'textOnly' && !e.target.checked) {
|
||||
// Re-enable options when text-only is disabled
|
||||
const preserveLinksCheckbox = this.modal.querySelector('input[name="preserveLinks"]');
|
||||
if (preserveLinksCheckbox) {
|
||||
preserveLinksCheckbox.disabled = false;
|
||||
}
|
||||
|
||||
const includeImagesCheckbox = this.modal.querySelector('input[name="includeImages"]');
|
||||
if (includeImagesCheckbox) {
|
||||
includeImagesCheckbox.disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Update markdown content
|
||||
await this.updateContent();
|
||||
});
|
||||
});
|
||||
|
||||
// Action buttons
|
||||
this.modal.querySelector('.c4ai-copy-markdown-btn').addEventListener('click', () => {
|
||||
this.copyToClipboard();
|
||||
});
|
||||
|
||||
this.modal.querySelector('.c4ai-download-btn').addEventListener('click', () => {
|
||||
this.downloadMarkdown();
|
||||
});
|
||||
}
|
||||
|
||||
switchTab(tabName) {
|
||||
// Update active tab
|
||||
this.modal.querySelectorAll('.c4ai-tab').forEach(tab => {
|
||||
tab.classList.toggle('active', tab.dataset.tab === tabName);
|
||||
});
|
||||
|
||||
// Update active pane
|
||||
this.modal.querySelectorAll('.c4ai-preview-pane').forEach(pane => {
|
||||
pane.classList.toggle('active', pane.dataset.pane === tabName);
|
||||
});
|
||||
}
|
||||
|
||||
async updateContent() {
|
||||
if (!this.onGenerateMarkdown) return;
|
||||
|
||||
try {
|
||||
// Generate markdown with current options
|
||||
this.currentMarkdown = await this.onGenerateMarkdown(this.markdownOptions);
|
||||
|
||||
// Update markdown pane
|
||||
const markdownPane = this.modal.querySelector('[data-pane="markdown"]');
|
||||
markdownPane.innerHTML = `<pre><code>${this.escapeHtml(this.currentMarkdown)}</code></pre>`;
|
||||
|
||||
// Update preview pane
|
||||
const previewPane = this.modal.querySelector('[data-pane="preview"]');
|
||||
|
||||
// Use marked.js if available
|
||||
if (window.marked) {
|
||||
marked.setOptions({
|
||||
gfm: true,
|
||||
breaks: true,
|
||||
tables: true,
|
||||
headerIds: false,
|
||||
mangle: false
|
||||
});
|
||||
|
||||
const html = marked.parse(this.currentMarkdown);
|
||||
previewPane.innerHTML = `<div class="c4ai-markdown-preview">${html}</div>`;
|
||||
} else {
|
||||
// Fallback
|
||||
previewPane.innerHTML = `<div class="c4ai-markdown-preview"><pre>${this.escapeHtml(this.currentMarkdown)}</pre></div>`;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error generating markdown:', error);
|
||||
this.showNotification('Error generating markdown', 'error');
|
||||
}
|
||||
}
|
||||
|
||||
async copyToClipboard() {
|
||||
try {
|
||||
await navigator.clipboard.writeText(this.currentMarkdown);
|
||||
this.showNotification('Markdown copied to clipboard!');
|
||||
} catch (err) {
|
||||
console.error('Failed to copy:', err);
|
||||
this.showNotification('Failed to copy. Please try again.', 'error');
|
||||
}
|
||||
}
|
||||
|
||||
async downloadMarkdown() {
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5);
|
||||
const filename = `crawl4ai-export-${timestamp}.md`;
|
||||
|
||||
// Create blob and download
|
||||
const blob = new Blob([this.currentMarkdown], { type: 'text/markdown' });
|
||||
const url = URL.createObjectURL(blob);
|
||||
|
||||
const a = document.createElement('a');
|
||||
a.href = url;
|
||||
a.download = filename;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
document.body.removeChild(a);
|
||||
URL.revokeObjectURL(url);
|
||||
|
||||
this.showNotification(`Downloaded ${filename}`);
|
||||
}
|
||||
|
||||
showNotification(message, type = 'success') {
|
||||
const notification = document.createElement('div');
|
||||
notification.className = `c4ai-notification c4ai-notification-${type}`;
|
||||
notification.textContent = message;
|
||||
|
||||
document.body.appendChild(notification);
|
||||
|
||||
// Animate in
|
||||
setTimeout(() => notification.classList.add('show'), 10);
|
||||
|
||||
// Remove after 3 seconds
|
||||
setTimeout(() => {
|
||||
notification.classList.remove('show');
|
||||
setTimeout(() => notification.remove(), 300);
|
||||
}, 3000);
|
||||
}
|
||||
|
||||
escapeHtml(unsafe) {
|
||||
return unsafe
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
|
||||
// Get current options
|
||||
getOptions() {
|
||||
return { ...this.markdownOptions };
|
||||
}
|
||||
|
||||
// Update options programmatically
|
||||
setOptions(options) {
|
||||
this.markdownOptions = { ...this.markdownOptions, ...options };
|
||||
|
||||
// Update checkboxes to reflect new options
|
||||
Object.entries(options).forEach(([key, value]) => {
|
||||
const checkbox = this.modal?.querySelector(`input[name="${key}"]`);
|
||||
if (checkbox && typeof value === 'boolean') {
|
||||
checkbox.checked = value;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
destroy() {
|
||||
if (this.modal) {
|
||||
this.modal.remove();
|
||||
this.modal = null;
|
||||
}
|
||||
this.onGenerateMarkdown = null;
|
||||
}
|
||||
}
|
||||
|
||||
// Export for use in other scripts
|
||||
if (typeof window !== 'undefined') {
|
||||
window.MarkdownPreviewModal = MarkdownPreviewModal;
|
||||
}
|
||||
@@ -127,6 +127,7 @@
|
||||
|
||||
/* macOS-style titlebar */
|
||||
.c4ai-toolbar-titlebar {
|
||||
gap: 1em;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
padding: 12px;
|
||||
|
||||
@@ -1,608 +0,0 @@
|
||||
// SchemaBuilder class for Crawl4AI Chrome Extension
|
||||
class SchemaBuilder {
|
||||
constructor() {
|
||||
this.mode = null;
|
||||
this.container = null;
|
||||
this.fields = [];
|
||||
this.overlay = null;
|
||||
this.toolbar = null;
|
||||
this.highlightBox = null;
|
||||
this.selectedElements = new Set();
|
||||
this.isPaused = false;
|
||||
this.codeModal = null;
|
||||
|
||||
this.handleMouseMove = this.handleMouseMove.bind(this);
|
||||
this.handleClick = this.handleClick.bind(this);
|
||||
this.handleKeyPress = this.handleKeyPress.bind(this);
|
||||
}
|
||||
|
||||
start() {
|
||||
this.mode = 'container';
|
||||
this.createOverlay();
|
||||
this.createToolbar();
|
||||
this.attachEventListeners();
|
||||
this.updateToolbar();
|
||||
}
|
||||
|
||||
stop() {
|
||||
this.detachEventListeners();
|
||||
this.overlay?.remove();
|
||||
this.toolbar?.remove();
|
||||
this.highlightBox?.remove();
|
||||
this.removeAllHighlights();
|
||||
this.mode = null;
|
||||
this.container = null;
|
||||
this.fields = [];
|
||||
this.selectedElements.clear();
|
||||
}
|
||||
|
||||
createOverlay() {
|
||||
// Create highlight box
|
||||
this.highlightBox = document.createElement('div');
|
||||
this.highlightBox.className = 'c4ai-highlight-box';
|
||||
document.body.appendChild(this.highlightBox);
|
||||
}
|
||||
|
||||
createToolbar() {
|
||||
this.toolbar = document.createElement('div');
|
||||
this.toolbar.className = 'c4ai-toolbar';
|
||||
this.toolbar.innerHTML = `
|
||||
<div class="c4ai-toolbar-titlebar">
|
||||
<div class="c4ai-titlebar-dots">
|
||||
<button class="c4ai-dot c4ai-dot-close" id="c4ai-close"></button>
|
||||
<button class="c4ai-dot c4ai-dot-minimize"></button>
|
||||
<button class="c4ai-dot c4ai-dot-maximize"></button>
|
||||
</div>
|
||||
<img src="${chrome.runtime.getURL('icons/icon-16.png')}" class="c4ai-titlebar-icon" alt="Crawl4AI">
|
||||
<div class="c4ai-titlebar-title">Crawl4AI Schema Builder</div>
|
||||
</div>
|
||||
<div class="c4ai-toolbar-content">
|
||||
<div class="c4ai-toolbar-status">
|
||||
<div class="c4ai-status-item">
|
||||
<span class="c4ai-status-label">Mode:</span>
|
||||
<span class="c4ai-status-value" id="c4ai-mode">Select Container</span>
|
||||
</div>
|
||||
<div class="c4ai-status-item">
|
||||
<span class="c4ai-status-label">Container:</span>
|
||||
<span class="c4ai-status-value" id="c4ai-container">Not selected</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="c4ai-fields-list" id="c4ai-fields-list" style="display: none;">
|
||||
<div class="c4ai-fields-header">Selected Fields:</div>
|
||||
<ul class="c4ai-fields-items" id="c4ai-fields-items"></ul>
|
||||
</div>
|
||||
<div class="c4ai-toolbar-hint" id="c4ai-hint">
|
||||
Click on a container element (e.g., product card, article, etc.)
|
||||
</div>
|
||||
<div class="c4ai-toolbar-actions">
|
||||
<button id="c4ai-pause" class="c4ai-action-btn c4ai-pause-btn">
|
||||
<span class="c4ai-pause-icon">⏸</span> Pause
|
||||
</button>
|
||||
<button id="c4ai-generate" class="c4ai-action-btn c4ai-generate-btn">
|
||||
<span class="c4ai-generate-icon">⚡</span> Generate Code
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
document.body.appendChild(this.toolbar);
|
||||
|
||||
// Add event listeners for toolbar buttons
|
||||
document.getElementById('c4ai-pause').addEventListener('click', () => this.togglePause());
|
||||
document.getElementById('c4ai-generate').addEventListener('click', () => this.stopAndGenerate());
|
||||
document.getElementById('c4ai-close').addEventListener('click', () => this.stop());
|
||||
|
||||
// Make toolbar draggable
|
||||
window.C4AI_Utils.makeDraggable(this.toolbar);
|
||||
}
|
||||
|
||||
attachEventListeners() {
|
||||
document.addEventListener('mousemove', this.handleMouseMove, true);
|
||||
document.addEventListener('click', this.handleClick, true);
|
||||
document.addEventListener('keydown', this.handleKeyPress, true);
|
||||
}
|
||||
|
||||
detachEventListeners() {
|
||||
document.removeEventListener('mousemove', this.handleMouseMove, true);
|
||||
document.removeEventListener('click', this.handleClick, true);
|
||||
document.removeEventListener('keydown', this.handleKeyPress, true);
|
||||
}
|
||||
|
||||
handleMouseMove(e) {
|
||||
if (this.isPaused) return;
|
||||
|
||||
const element = document.elementFromPoint(e.clientX, e.clientY);
|
||||
if (element && !this.isOurElement(element)) {
|
||||
this.highlightElement(element);
|
||||
}
|
||||
}
|
||||
|
||||
handleClick(e) {
|
||||
if (this.isPaused) return;
|
||||
|
||||
const element = e.target;
|
||||
|
||||
if (this.isOurElement(element)) {
|
||||
return;
|
||||
}
|
||||
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
|
||||
if (this.mode === 'container') {
|
||||
this.selectContainer(element);
|
||||
} else if (this.mode === 'field') {
|
||||
this.selectField(element);
|
||||
}
|
||||
}
|
||||
|
||||
handleKeyPress(e) {
|
||||
if (e.key === 'Escape') {
|
||||
this.stop();
|
||||
}
|
||||
}
|
||||
|
||||
isOurElement(element) {
|
||||
return window.C4AI_Utils.isOurElement(element);
|
||||
}
|
||||
|
||||
togglePause() {
|
||||
this.isPaused = !this.isPaused;
|
||||
const pauseBtn = document.getElementById('c4ai-pause');
|
||||
if (this.isPaused) {
|
||||
pauseBtn.innerHTML = '<span class="c4ai-play-icon">▶</span> Resume';
|
||||
pauseBtn.classList.add('c4ai-paused');
|
||||
this.highlightBox.style.display = 'none';
|
||||
} else {
|
||||
pauseBtn.innerHTML = '<span class="c4ai-pause-icon">⏸</span> Pause';
|
||||
pauseBtn.classList.remove('c4ai-paused');
|
||||
}
|
||||
}
|
||||
|
||||
stopAndGenerate() {
|
||||
if (!this.container || this.fields.length === 0) {
|
||||
alert('Please select a container and at least one field before generating code.');
|
||||
return;
|
||||
}
|
||||
|
||||
const code = this.generateCode();
|
||||
this.showCodeModal(code);
|
||||
}
|
||||
|
||||
highlightElement(element) {
|
||||
const rect = element.getBoundingClientRect();
|
||||
this.highlightBox.style.cssText = `
|
||||
left: ${rect.left + window.scrollX}px;
|
||||
top: ${rect.top + window.scrollY}px;
|
||||
width: ${rect.width}px;
|
||||
height: ${rect.height}px;
|
||||
display: block;
|
||||
`;
|
||||
|
||||
if (this.mode === 'container') {
|
||||
this.highlightBox.className = 'c4ai-highlight-box c4ai-container-mode';
|
||||
} else {
|
||||
this.highlightBox.className = 'c4ai-highlight-box c4ai-field-mode';
|
||||
}
|
||||
}
|
||||
|
||||
selectContainer(element) {
|
||||
// Remove previous container highlight
|
||||
if (this.container) {
|
||||
this.container.element.classList.remove('c4ai-selected-container');
|
||||
}
|
||||
|
||||
this.container = {
|
||||
element: element,
|
||||
html: element.outerHTML,
|
||||
selector: this.generateSelector(element),
|
||||
tagName: element.tagName.toLowerCase()
|
||||
};
|
||||
|
||||
element.classList.add('c4ai-selected-container');
|
||||
this.mode = 'field';
|
||||
this.updateToolbar();
|
||||
this.updateStats();
|
||||
}
|
||||
|
||||
selectField(element) {
|
||||
// Don't select the container itself
|
||||
if (element === this.container.element) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if already selected - if so, deselect it
|
||||
if (this.selectedElements.has(element)) {
|
||||
this.deselectField(element);
|
||||
return;
|
||||
}
|
||||
|
||||
// Must be inside the container
|
||||
if (!this.container.element.contains(element)) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.showFieldDialog(element);
|
||||
}
|
||||
|
||||
deselectField(element) {
|
||||
// Remove from fields array
|
||||
this.fields = this.fields.filter(f => f.element !== element);
|
||||
|
||||
// Remove from selected elements set
|
||||
this.selectedElements.delete(element);
|
||||
|
||||
// Remove visual selection
|
||||
element.classList.remove('c4ai-selected-field');
|
||||
|
||||
// Update UI
|
||||
this.updateToolbar();
|
||||
this.updateStats();
|
||||
}
|
||||
|
||||
showFieldDialog(element) {
|
||||
const dialog = document.createElement('div');
|
||||
dialog.className = 'c4ai-field-dialog';
|
||||
|
||||
const rect = element.getBoundingClientRect();
|
||||
dialog.style.cssText = `
|
||||
left: ${rect.left + window.scrollX}px;
|
||||
top: ${rect.bottom + window.scrollY + 10}px;
|
||||
`;
|
||||
|
||||
dialog.innerHTML = `
|
||||
<div class="c4ai-field-dialog-content">
|
||||
<h4>Name this field:</h4>
|
||||
<input type="text" id="c4ai-field-name" placeholder="e.g., title, price, description" autofocus>
|
||||
<div class="c4ai-field-preview">
|
||||
<strong>Content:</strong> ${element.textContent.trim().substring(0, 50)}...
|
||||
</div>
|
||||
<div class="c4ai-field-actions">
|
||||
<button id="c4ai-field-save">Save</button>
|
||||
<button id="c4ai-field-cancel">Cancel</button>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
document.body.appendChild(dialog);
|
||||
|
||||
const input = dialog.querySelector('#c4ai-field-name');
|
||||
const saveBtn = dialog.querySelector('#c4ai-field-save');
|
||||
const cancelBtn = dialog.querySelector('#c4ai-field-cancel');
|
||||
|
||||
const save = () => {
|
||||
const fieldName = input.value.trim();
|
||||
if (fieldName) {
|
||||
this.fields.push({
|
||||
name: fieldName,
|
||||
value: element.textContent.trim(),
|
||||
element: element,
|
||||
selector: this.generateSelector(element, this.container.element)
|
||||
});
|
||||
|
||||
element.classList.add('c4ai-selected-field');
|
||||
this.selectedElements.add(element);
|
||||
this.updateToolbar();
|
||||
this.updateStats();
|
||||
}
|
||||
dialog.remove();
|
||||
};
|
||||
|
||||
const cancel = () => {
|
||||
dialog.remove();
|
||||
};
|
||||
|
||||
saveBtn.addEventListener('click', save);
|
||||
cancelBtn.addEventListener('click', cancel);
|
||||
input.addEventListener('keypress', (e) => {
|
||||
if (e.key === 'Enter') save();
|
||||
if (e.key === 'Escape') cancel();
|
||||
});
|
||||
|
||||
input.focus();
|
||||
}
|
||||
|
||||
generateSelector(element, context = document) {
|
||||
// Try to generate a robust selector
|
||||
if (element.id) {
|
||||
return `#${CSS.escape(element.id)}`;
|
||||
}
|
||||
|
||||
// Check for data attributes (most stable)
|
||||
const dataAttrs = ['data-testid', 'data-id', 'data-test', 'data-cy'];
|
||||
for (const attr of dataAttrs) {
|
||||
const value = element.getAttribute(attr);
|
||||
if (value) {
|
||||
return `[${attr}="${value}"]`;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for aria-label
|
||||
if (element.getAttribute('aria-label')) {
|
||||
return `[aria-label="${element.getAttribute('aria-label')}"]`;
|
||||
}
|
||||
|
||||
// Try semantic HTML elements with text
|
||||
const tagName = element.tagName.toLowerCase();
|
||||
if (['button', 'a', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) {
|
||||
const text = element.textContent.trim();
|
||||
if (text && text.length < 50) {
|
||||
// Use tag name with partial text match
|
||||
return `${tagName}`;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for simple, non-utility classes
|
||||
const classes = Array.from(element.classList)
|
||||
.filter(c => !c.startsWith('c4ai-')) // Exclude our classes
|
||||
.filter(c => !c.includes('[') && !c.includes('(') && !c.includes(':')) // Exclude utility classes
|
||||
.filter(c => c.length < 30); // Exclude very long classes
|
||||
|
||||
if (classes.length > 0 && classes.length <= 3) {
|
||||
const selector = classes.map(c => `.${CSS.escape(c)}`).join('');
|
||||
try {
|
||||
if (context.querySelectorAll(selector).length === 1) {
|
||||
return selector;
|
||||
}
|
||||
} catch (e) {
|
||||
// Invalid selector, continue
|
||||
}
|
||||
}
|
||||
|
||||
// Use nth-child with simple parent tag
|
||||
const parent = element.parentElement;
|
||||
if (parent && parent !== context) {
|
||||
const siblings = Array.from(parent.children);
|
||||
const index = siblings.indexOf(element) + 1;
|
||||
// Just use parent tag name to avoid recursion
|
||||
const parentTag = parent.tagName.toLowerCase();
|
||||
return `${parentTag} > ${tagName}:nth-child(${index})`;
|
||||
}
|
||||
|
||||
// Final fallback
|
||||
return tagName;
|
||||
}
|
||||
|
||||
updateToolbar() {
|
||||
document.getElementById('c4ai-mode').textContent =
|
||||
this.mode === 'container' ? 'Select Container' : 'Select Fields';
|
||||
|
||||
document.getElementById('c4ai-container').textContent =
|
||||
this.container ? `${this.container.tagName} ✓` : 'Not selected';
|
||||
|
||||
// Update fields list
|
||||
const fieldsList = document.getElementById('c4ai-fields-list');
|
||||
const fieldsItems = document.getElementById('c4ai-fields-items');
|
||||
|
||||
if (this.fields.length > 0) {
|
||||
fieldsList.style.display = 'block';
|
||||
fieldsItems.innerHTML = this.fields.map(field => `
|
||||
<li class="c4ai-field-item">
|
||||
<span class="c4ai-field-name">${field.name}</span>
|
||||
<span class="c4ai-field-value">${field.value.substring(0, 30)}${field.value.length > 30 ? '...' : ''}</span>
|
||||
</li>
|
||||
`).join('');
|
||||
} else {
|
||||
fieldsList.style.display = 'none';
|
||||
}
|
||||
|
||||
const hint = document.getElementById('c4ai-hint');
|
||||
if (this.mode === 'container') {
|
||||
hint.textContent = 'Click on a container element (e.g., product card, article, etc.)';
|
||||
} else if (this.fields.length === 0) {
|
||||
hint.textContent = 'Click on fields inside the container to extract (title, price, etc.)';
|
||||
} else {
|
||||
hint.innerHTML = `Continue selecting fields or click <strong>Stop & Generate</strong> to finish.`;
|
||||
}
|
||||
}
|
||||
|
||||
updateStats() {
|
||||
chrome.runtime.sendMessage({
|
||||
action: 'updateStats',
|
||||
stats: {
|
||||
container: !!this.container,
|
||||
fields: this.fields.length
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
removeAllHighlights() {
|
||||
document.querySelectorAll('.c4ai-selected-container').forEach(el => {
|
||||
el.classList.remove('c4ai-selected-container');
|
||||
});
|
||||
document.querySelectorAll('.c4ai-selected-field').forEach(el => {
|
||||
el.classList.remove('c4ai-selected-field');
|
||||
});
|
||||
}
|
||||
|
||||
generateCode() {
|
||||
const fieldDescriptions = this.fields.map(f =>
|
||||
`- ${f.name} (example: "${f.value.substring(0, 50)}...")`
|
||||
).join('\n');
|
||||
|
||||
return `#!/usr/bin/env python3
|
||||
"""
|
||||
Generated by Crawl4AI Chrome Extension
|
||||
URL: ${window.location.href}
|
||||
Generated: ${new Date().toISOString()}
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from pathlib import Path
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
|
||||
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
|
||||
|
||||
# HTML snippet of the selected container element
|
||||
HTML_SNIPPET = """
|
||||
${this.container.html}
|
||||
"""
|
||||
|
||||
# Extraction query based on your field selections
|
||||
EXTRACTION_QUERY = """
|
||||
Create a JSON CSS extraction schema to extract the following fields:
|
||||
${fieldDescriptions}
|
||||
|
||||
The schema should handle multiple ${this.container.tagName} elements on the page.
|
||||
Each item should be extracted as a separate object in the results array.
|
||||
"""
|
||||
|
||||
async def generate_schema():
|
||||
"""Generate extraction schema using LLM"""
|
||||
print("🔧 Generating extraction schema...")
|
||||
|
||||
try:
|
||||
# Generate the schema using Crawl4AI's built-in LLM integration
|
||||
schema = JsonCssExtractionStrategy.generate_schema(
|
||||
html=HTML_SNIPPET,
|
||||
query=EXTRACTION_QUERY,
|
||||
)
|
||||
|
||||
# Save the schema for reuse
|
||||
schema_path = Path('generated_schema.json')
|
||||
with open(schema_path, 'w') as f:
|
||||
json.dump(schema, f, indent=2)
|
||||
|
||||
print("✅ Schema generated successfully!")
|
||||
print(f"📄 Schema saved to: {schema_path}")
|
||||
print("\\nGenerated schema:")
|
||||
print(json.dumps(schema, indent=2))
|
||||
|
||||
return schema
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error generating schema: {e}")
|
||||
return None
|
||||
|
||||
async def test_extraction(url: str = "${window.location.href}"):
|
||||
"""Test the generated schema on the actual webpage"""
|
||||
print("\\n🧪 Testing extraction on live webpage...")
|
||||
|
||||
# Load the generated schema
|
||||
try:
|
||||
with open('generated_schema.json', 'r') as f:
|
||||
schema = json.load(f)
|
||||
except FileNotFoundError:
|
||||
print("❌ Schema file not found. Run generate_schema() first.")
|
||||
return
|
||||
|
||||
# Configure browser
|
||||
browser_config = BrowserConfig(
|
||||
headless=True,
|
||||
verbose=False
|
||||
)
|
||||
|
||||
# Configure extraction
|
||||
crawler_config = CrawlerRunConfig(
|
||||
extraction_strategy=JsonCssExtractionStrategy(schema=schema)
|
||||
)
|
||||
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
result = await crawler.arun(
|
||||
url=url,
|
||||
config=crawler_config
|
||||
)
|
||||
|
||||
if result.success and result.extracted_content:
|
||||
data = json.loads(result.extracted_content)
|
||||
print(f"\\n✅ Successfully extracted {len(data)} items!")
|
||||
|
||||
# Save results
|
||||
with open('extracted_data.json', 'w') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
# Show sample results
|
||||
print("\\n📊 Sample results (first 2 items):")
|
||||
for i, item in enumerate(data[:2], 1):
|
||||
print(f"\\nItem {i}:")
|
||||
for key, value in item.items():
|
||||
print(f" {key}: {value}")
|
||||
else:
|
||||
print("❌ Extraction failed:", result.error_message)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Step 1: Generate the schema from HTML snippet
|
||||
asyncio.run(generate_schema())
|
||||
|
||||
# Step 2: Test extraction on the live webpage
|
||||
# Uncomment the line below to test extraction:
|
||||
# asyncio.run(test_extraction())
|
||||
|
||||
print("\\n🎯 Next steps:")
|
||||
print("1. Review the generated schema in 'generated_schema.json'")
|
||||
print("2. Uncomment the test_extraction() line to test on the live site")
|
||||
print("3. Use the schema in your Crawl4AI projects!")
|
||||
`;
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
showCodeModal(code) {
|
||||
// Create modal
|
||||
this.codeModal = document.createElement('div');
|
||||
this.codeModal.className = 'c4ai-code-modal';
|
||||
this.codeModal.innerHTML = `
|
||||
<div class="c4ai-code-modal-content">
|
||||
<div class="c4ai-code-modal-header">
|
||||
<h2>Generated Python Code</h2>
|
||||
<button class="c4ai-close-modal" id="c4ai-close-modal">✕</button>
|
||||
</div>
|
||||
<div class="c4ai-code-modal-body">
|
||||
<pre class="c4ai-code-block"><code class="language-python">${window.C4AI_Utils.escapeHtml(code)}</code></pre>
|
||||
</div>
|
||||
<div class="c4ai-code-modal-footer">
|
||||
<button class="c4ai-action-btn c4ai-cloud-btn" id="c4ai-run-cloud" disabled>
|
||||
<span>☁️</span> Run on C4AI Cloud (Coming Soon)
|
||||
</button>
|
||||
<button class="c4ai-action-btn c4ai-download-btn" id="c4ai-download-code">
|
||||
<span>⬇</span> Download Code
|
||||
</button>
|
||||
<button class="c4ai-action-btn c4ai-copy-btn" id="c4ai-copy-code">
|
||||
<span>📋</span> Copy to Clipboard
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
document.body.appendChild(this.codeModal);
|
||||
|
||||
// Add event listeners
|
||||
document.getElementById('c4ai-close-modal').addEventListener('click', () => {
|
||||
this.codeModal.remove();
|
||||
this.codeModal = null;
|
||||
// Don't stop the capture session
|
||||
});
|
||||
|
||||
document.getElementById('c4ai-download-code').addEventListener('click', () => {
|
||||
chrome.runtime.sendMessage({
|
||||
action: 'downloadCode',
|
||||
code: code,
|
||||
filename: `crawl4ai_schema_${Date.now()}.py`
|
||||
}, (response) => {
|
||||
if (response && response.success) {
|
||||
const btn = document.getElementById('c4ai-download-code');
|
||||
const originalHTML = btn.innerHTML;
|
||||
btn.innerHTML = '<span>✓</span> Downloaded!';
|
||||
setTimeout(() => {
|
||||
btn.innerHTML = originalHTML;
|
||||
}, 2000);
|
||||
} else {
|
||||
console.error('Download failed:', response?.error);
|
||||
alert('Download failed. Please check your browser settings.');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
document.getElementById('c4ai-copy-code').addEventListener('click', () => {
|
||||
navigator.clipboard.writeText(code).then(() => {
|
||||
const btn = document.getElementById('c4ai-copy-code');
|
||||
btn.innerHTML = '<span>✓</span> Copied!';
|
||||
setTimeout(() => {
|
||||
btn.innerHTML = '<span>📋</span> Copy to Clipboard';
|
||||
}, 2000);
|
||||
});
|
||||
});
|
||||
|
||||
// Apply syntax highlighting
|
||||
window.C4AI_Utils.applySyntaxHighlighting(this.codeModal.querySelector('.language-python'));
|
||||
}
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -61,14 +61,14 @@
|
||||
<p>Transform any website into structured data with just a few clicks! The Crawl4AI Assistant Chrome Extension provides three powerful tools for web scraping and data extraction.</p>
|
||||
|
||||
<div style="background: #0fbbaa; color: #070708; padding: 12px 16px; border-radius: 8px; margin: 16px 0; font-weight: 600;">
|
||||
🎉 NEW: Schema Builder now extracts data INSTANTLY without any LLM! Test your schema and see JSON results immediately in the browser!
|
||||
🎉 NEW: Click2Crawl extracts data INSTANTLY without any LLM! Test your schema and see JSON results immediately in the browser!
|
||||
</div>
|
||||
|
||||
<div class="features-grid">
|
||||
<div class="feature-card">
|
||||
<span class="feature-icon">🎯</span>
|
||||
<h3>Schema Builder</h3>
|
||||
<p>Extract data instantly without LLMs - see results in real-time!</p>
|
||||
<h3>Click2Crawl</h3>
|
||||
<p>Visual data extraction - click elements to build schemas instantly!</p>
|
||||
</div>
|
||||
<div class="feature-card">
|
||||
<span class="feature-icon">🔴</span>
|
||||
@@ -77,8 +77,8 @@
|
||||
</div>
|
||||
<div class="feature-card">
|
||||
<span class="feature-icon">📝</span>
|
||||
<h3>Click2Crawl <span style="color: #0fbbaa; font-size: 0.75rem;">(New!)</span></h3>
|
||||
<p>Select multiple elements to extract clean markdown "as you see"</p>
|
||||
<h3>Markdown Extraction <span style="color: #0fbbaa; font-size: 0.75rem;">(New!)</span></h3>
|
||||
<p>Convert any webpage content to clean markdown with Visual Text Mode</p>
|
||||
</div>
|
||||
<!-- <div class="feature-card">
|
||||
<span class="feature-icon">🐍</span>
|
||||
@@ -104,9 +104,9 @@
|
||||
<div class="step-content">
|
||||
<h4>Download the Extension</h4>
|
||||
<p>Get the latest release from GitHub or use the button below</p>
|
||||
<a href="crawl4ai-assistant-v1.2.1.zip" class="download-button" download>
|
||||
<a href="crawl4ai-assistant-v1.3.0.zip" class="download-button" download>
|
||||
<span class="button-icon">↓</span>
|
||||
Download Extension (v1.2.1)
|
||||
Download Extension (v1.3.0)
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
@@ -136,10 +136,10 @@
|
||||
<div class="tools-container">
|
||||
<!-- Left Panel - Tool Selector -->
|
||||
<div class="tools-panel">
|
||||
<div class="tool-selector active" data-tool="schema-builder">
|
||||
<div class="tool-icon">📊</div>
|
||||
<div class="tool-selector active" data-tool="click2crawl">
|
||||
<div class="tool-icon">🎯</div>
|
||||
<div class="tool-info">
|
||||
<h3>Schema Builder</h3>
|
||||
<h3>Click2Crawl</h3>
|
||||
<p>Visual data extraction</p>
|
||||
</div>
|
||||
<div class="tool-status">Available</div>
|
||||
@@ -154,11 +154,11 @@
|
||||
<div class="tool-status alpha">Alpha</div>
|
||||
</div>
|
||||
|
||||
<div class="tool-selector" data-tool="click2crawl">
|
||||
<div class="tool-selector" data-tool="markdown-extraction">
|
||||
<div class="tool-icon">📝</div>
|
||||
<div class="tool-info">
|
||||
<h3>Click2Crawl</h3>
|
||||
<p>Markdown extraction</p>
|
||||
<h3>Markdown Extraction</h3>
|
||||
<p>Content to markdown</p>
|
||||
</div>
|
||||
<div class="tool-status new">New!</div>
|
||||
</div>
|
||||
@@ -166,11 +166,11 @@
|
||||
|
||||
<!-- Right Panel - Tool Details -->
|
||||
<div class="tool-details">
|
||||
<!-- Schema Builder Details -->
|
||||
<div class="tool-content active" id="schema-builder">
|
||||
<!-- Click2Crawl Details -->
|
||||
<div class="tool-content active" id="click2crawl">
|
||||
<div class="tool-header">
|
||||
<h3>📊 Schema Builder</h3>
|
||||
<span class="tool-tagline">No LLM needed - Extract data instantly!</span>
|
||||
<h3>🎯 Click2Crawl</h3>
|
||||
<span class="tool-tagline">Click elements to build extraction schemas - No LLM needed!</span>
|
||||
</div>
|
||||
|
||||
<div class="tool-steps">
|
||||
@@ -199,8 +199,8 @@
|
||||
<div class="step-item">
|
||||
<div class="step-number">3</div>
|
||||
<div class="step-content">
|
||||
<h4>Test & Extract Data NOW!</h4>
|
||||
<p>🎉 Click "Test Schema" to extract ALL matching data instantly - no coding required!</p>
|
||||
<h4>Test & Extract Data Instantly!</h4>
|
||||
<p>🎉 Click "Test Schema" to see extracted JSON immediately - no LLM or coding required!</p>
|
||||
<div class="step-visual">
|
||||
<span class="highlight-accent">⚡</span> See extracted JSON immediately
|
||||
</div>
|
||||
@@ -210,11 +210,12 @@
|
||||
|
||||
<div class="tool-features">
|
||||
<div class="feature-tag">🚀 Zero LLM dependency</div>
|
||||
<div class="feature-tag">📊 Instant data extraction</div>
|
||||
<div class="feature-tag">🎯 Smart selector generation</div>
|
||||
<div class="feature-tag">🐍 Ready-to-run Python code</div>
|
||||
<div class="feature-tag">✨ Preview matching elements</div>
|
||||
<div class="feature-tag">📥 Download JSON results</div>
|
||||
<div class="feature-tag">📊 Instant JSON extraction</div>
|
||||
<div class="feature-tag">🎯 Visual element selection</div>
|
||||
<div class="feature-tag">🐍 Export Python code</div>
|
||||
<div class="feature-tag">✨ Live preview</div>
|
||||
<div class="feature-tag">📥 Download results</div>
|
||||
<div class="feature-tag">📝 Export to markdown</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -268,11 +269,11 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Click2Crawl Details -->
|
||||
<div class="tool-content" id="click2crawl">
|
||||
<!-- Markdown Extraction Details -->
|
||||
<div class="tool-content" id="markdown-extraction">
|
||||
<div class="tool-header">
|
||||
<h3>📝 Click2Crawl</h3>
|
||||
<span class="tool-tagline">Select multiple elements to extract clean markdown</span>
|
||||
<h3>📝 Markdown Extraction</h3>
|
||||
<span class="tool-tagline">Convert webpage content to clean markdown "as you see"</span>
|
||||
</div>
|
||||
|
||||
<div class="tool-steps">
|
||||
@@ -312,9 +313,9 @@
|
||||
|
||||
<div class="tool-features">
|
||||
<div class="feature-tag">Multi-select with Ctrl/Cmd</div>
|
||||
<div class="feature-tag">Visual Text Mode</div>
|
||||
<div class="feature-tag">Smart formatting</div>
|
||||
<div class="feature-tag">Cloud export (soon)</div>
|
||||
<div class="feature-tag">Visual Text Mode (As You See)</div>
|
||||
<div class="feature-tag">Clean markdown output</div>
|
||||
<div class="feature-tag">Export to Crawl4AI Cloud (soon)</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -326,26 +327,26 @@
|
||||
<h2>See the Generated Code & Extracted Data</h2>
|
||||
|
||||
<div class="code-tabs">
|
||||
<button class="code-tab active" data-example="schema">📊 Schema Builder</button>
|
||||
<button class="code-tab active" data-example="schema">🎯 Click2Crawl</button>
|
||||
<button class="code-tab" data-example="script">🔴 Script Builder</button>
|
||||
<button class="code-tab" data-example="markdown">📝 Click2Crawl</button>
|
||||
<button class="code-tab" data-example="markdown">📝 Markdown Extraction</button>
|
||||
</div>
|
||||
|
||||
<div class="code-examples">
|
||||
<!-- Schema Builder Code -->
|
||||
<!-- Click2Crawl Code -->
|
||||
<div class="code-example active" id="code-schema">
|
||||
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 16px;">
|
||||
<!-- Python Code -->
|
||||
<div class="terminal-window">
|
||||
<div class="terminal-header">
|
||||
<span class="terminal-title">schema_extraction.py</span>
|
||||
<span class="terminal-title">click2crawl_extraction.py</span>
|
||||
<button class="copy-button" data-code="schema-python">Copy</button>
|
||||
</div>
|
||||
<div class="terminal-content">
|
||||
<pre><code><span class="comment">#!/usr/bin/env python3</span>
|
||||
<span class="comment">"""
|
||||
🎉 NO LLM NEEDED! Direct extraction with CSS selectors
|
||||
Generated by Crawl4AI Chrome Extension
|
||||
Generated by Crawl4AI Chrome Extension - Click2Crawl
|
||||
"""</span>
|
||||
|
||||
<span class="keyword">import</span> asyncio
|
||||
@@ -353,7 +354,7 @@ Generated by Crawl4AI Chrome Extension
|
||||
<span class="keyword">from</span> crawl4ai <span class="keyword">import</span> AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
|
||||
<span class="keyword">from</span> crawl4ai.extraction_strategy <span class="keyword">import</span> JsonCssExtractionStrategy
|
||||
|
||||
<span class="comment"># The EXACT schema from your visual clicks - no guessing!</span>
|
||||
<span class="comment"># The EXACT schema from Click2Crawl - no guessing!</span>
|
||||
EXTRACTION_SCHEMA = {
|
||||
<span class="string">"name"</span>: <span class="string">"Product Catalog"</span>,
|
||||
<span class="string">"baseSelector"</span>: <span class="string">"div.product-card"</span>, <span class="comment"># The container you selected</span>
|
||||
@@ -515,7 +516,7 @@ asyncio.run(automate_shopping())</code></pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Click2Crawl Markdown Output -->
|
||||
<!-- Markdown Extraction Output -->
|
||||
<div class="code-example" id="code-markdown">
|
||||
<div class="terminal-window">
|
||||
<div class="terminal-header">
|
||||
@@ -692,20 +693,20 @@ Today, finding a 24-hour restaurant in Manhattan requires genuine effort. The pa
|
||||
<div class="coming-feature">
|
||||
<div class="feature-header">
|
||||
<span class="feature-badge">Direct</span>
|
||||
<h3>Get CrawlResult Without Code</h3>
|
||||
<h3>Direct Data Download</h3>
|
||||
</div>
|
||||
<p>Skip the code generation entirely! Get extracted data directly in the extension as a CrawlResult object, ready to download as JSON.</p>
|
||||
<p>Skip the code generation entirely! Download extracted data directly from Click2Crawl as JSON or CSV files.</p>
|
||||
<div class="feature-preview">
|
||||
<code>📊 One-click extraction • No Python needed • Export to JSON/CSV</code>
|
||||
<code>📊 One-click download • No Python needed • Multiple export formats</code>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="coming-feature">
|
||||
<div class="feature-header">
|
||||
<span class="feature-badge">AI</span>
|
||||
<h3>Smart Schema Suggestions</h3>
|
||||
<h3>Smart Field Detection</h3>
|
||||
</div>
|
||||
<p>AI-powered field detection that automatically suggests the most likely data fields on any page, making schema building even faster.</p>
|
||||
<p>AI-powered field detection for Click2Crawl that automatically suggests the most likely data fields on any page.</p>
|
||||
<div class="feature-preview">
|
||||
<code>🤖 Auto-detect fields • Smart naming • Pattern recognition</code>
|
||||
</div>
|
||||
@@ -758,7 +759,10 @@ Today, finding a 24-hour restaurant in Manhattan requires genuine effort. The pa
|
||||
|
||||
// Show corresponding content
|
||||
const toolId = this.getAttribute('data-tool');
|
||||
document.getElementById(toolId).classList.add('active');
|
||||
const contentElement = document.getElementById(toolId);
|
||||
if (contentElement) {
|
||||
contentElement.classList.add('active');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"manifest_version": 3,
|
||||
"name": "Crawl4AI Assistant",
|
||||
"version": "1.2.1",
|
||||
"version": "1.3.0",
|
||||
"description": "Visual schema and script builder for Crawl4AI - Build extraction schemas and automation scripts by clicking and recording actions",
|
||||
"permissions": [
|
||||
"activeTab",
|
||||
@@ -25,11 +25,12 @@
|
||||
"js": [
|
||||
"libs/marked.min.js",
|
||||
"content/shared/utils.js",
|
||||
"content/schemaBuilder.js",
|
||||
"content/markdownPreviewModal.js",
|
||||
"content/click2crawl.js",
|
||||
"content/scriptBuilder.js",
|
||||
"content/contentAnalyzer.js",
|
||||
"content/markdownConverter.js",
|
||||
"content/click2CrawlBuilder.js",
|
||||
"content/markdownExtraction.js",
|
||||
"content/content.js"
|
||||
],
|
||||
"css": ["content/overlay.css"],
|
||||
|
||||
@@ -38,6 +38,8 @@ body {
|
||||
font-family: var(--font-primary);
|
||||
background: #0a0a0a;
|
||||
color: #e0e0e0;
|
||||
border-radius: 16px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.popup-container {
|
||||
|
||||
@@ -23,15 +23,15 @@
|
||||
|
||||
<div class="mode-selector">
|
||||
<button id="schema-mode" class="mode-button schema">
|
||||
<div class="icon">📊</div>
|
||||
<div class="icon">🎯</div>
|
||||
<div class="mode-info">
|
||||
<h3>Schema Builder</h3>
|
||||
<h3>Click2Crawl</h3>
|
||||
<p>Click elements to build extraction schemas</p>
|
||||
</div>
|
||||
</button>
|
||||
|
||||
<button id="script-mode" class="mode-button script">
|
||||
<div class="icon">🎯</div>
|
||||
<div class="icon">🔴</div>
|
||||
<div class="mode-info">
|
||||
<h3>Script Builder <span style="color: #ff3c74; font-size: 10px;">(Alpha)</span></h3>
|
||||
<p>Record actions to build automation scripts</p>
|
||||
@@ -39,9 +39,9 @@
|
||||
</button>
|
||||
|
||||
<button id="c2c-mode" class="mode-button c2c">
|
||||
<div class="icon">✨</div>
|
||||
<div class="icon">📝</div>
|
||||
<div class="mode-info">
|
||||
<h3>Click2Crawl</h3>
|
||||
<h3>Markdown Extraction</h3>
|
||||
<p>Select elements and convert to clean markdown</p>
|
||||
</div>
|
||||
</button>
|
||||
@@ -72,10 +72,10 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="instructions">
|
||||
<div class="instructions" style="display: none;">
|
||||
<h4>How to use:</h4>
|
||||
<ol>
|
||||
<li>Click "Schema Builder" to start</li>
|
||||
<li>Click "Click2Crawl" to start</li>
|
||||
<li>Click on a container element (e.g., product card)</li>
|
||||
<li>Click individual fields inside and name them</li>
|
||||
<li>Generate Python code when done</li>
|
||||
|
||||
Reference in New Issue
Block a user