diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 616bc6dd..52e79a4f 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -20,7 +20,8 @@ "Bash(docker logs:*)", "Bash(curl:*)", "Bash(docker compose:*)", - "Bash(./test-final-integration.sh:*)" + "Bash(./test-final-integration.sh:*)", + "Bash(mv:*)" ] }, "enableAllProjectMcpServers": false diff --git a/docs/md_v2/apps/crawl4ai-assistant/README.md b/docs/md_v2/apps/crawl4ai-assistant/README.md index 9d3841f1..9d6f4a60 100644 --- a/docs/md_v2/apps/crawl4ai-assistant/README.md +++ b/docs/md_v2/apps/crawl4ai-assistant/README.md @@ -1,14 +1,15 @@ # Crawl4AI Chrome Extension -Visual schema and script builder for Crawl4AI - Build extraction schemas by clicking on webpage elements! +Visual extraction tools for Crawl4AI - Click to extract data and content from any webpage! ## ๐Ÿš€ Features -- **Visual Schema Builder**: Click on elements to build extraction schemas +- **Click2Crawl**: Click on elements to build data extraction schemas instantly +- **Markdown Extraction**: Select elements and export as clean markdown +- **Script Builder (Alpha)**: Record browser actions to create automation scripts - **Smart Element Selection**: Container and field selection with visual feedback -- **Code Generation**: Generates complete Python code with LLM integration +- **Code Generation**: Generates complete Python code for Crawl4AI - **Beautiful Dark UI**: Consistent with Crawl4AI's design language -- **One-Click Download**: Get your generated code instantly ## ๐Ÿ“ฆ Installation @@ -33,11 +34,11 @@ If you want proper icons: ## ๐ŸŽฏ How to Use -### Building a Schema +### Using Click2Crawl 1. **Navigate to any website** you want to extract data from 2. **Click the Crawl4AI extension icon** in your toolbar -3. **Click "Schema Builder"** to start the capture mode +3. **Click "Click2Crawl"** to start the capture mode 4. **Select a container element**: - Hover over elements (they'll highlight in blue) - Click on a repeating container (e.g., product card, article block) @@ -45,9 +46,9 @@ If you want proper icons: - Elements will now highlight in green - Click on each piece of data you want to extract - Name each field (e.g., "title", "price", "description") -6. **Generate the code**: - - Click "Generate Code" in the extension popup - - A Python file will automatically download +6. **Test and Export**: + - Click "Test Schema" to see extracted data instantly + - Export as Python code, JSON schema, or markdown ### Running the Generated Code diff --git a/docs/md_v2/apps/crawl4ai-assistant/content/schemaBuilder.js b/docs/md_v2/apps/crawl4ai-assistant/content/click2crawl.js similarity index 91% rename from docs/md_v2/apps/crawl4ai-assistant/content/schemaBuilder.js rename to docs/md_v2/apps/crawl4ai-assistant/content/click2crawl.js index 7bbb7a80..0c3c37b9 100644 --- a/docs/md_v2/apps/crawl4ai-assistant/content/schemaBuilder.js +++ b/docs/md_v2/apps/crawl4ai-assistant/content/click2crawl.js @@ -1,15 +1,16 @@ -// Enhanced SchemaBuilder class for Crawl4AI Chrome Extension +// Click2Crawl class for Crawl4AI Chrome Extension +// Click elements to build extraction schemas // Singleton instance to prevent multiple toolbars -let schemaBuilderInstance = null; +let click2CrawlInstance = null; -class SchemaBuilder { +class Click2Crawl { constructor() { // Prevent multiple instances - if (schemaBuilderInstance) { - schemaBuilderInstance.stop(); + if (click2CrawlInstance) { + click2CrawlInstance.stop(); } - schemaBuilderInstance = this; + click2CrawlInstance = this; this.container = null; this.fields = []; @@ -57,9 +58,15 @@ class SchemaBuilder { this.inspectingFields = false; this.parentLevels = 1; + // Clean up markdown preview modal + if (this.markdownPreviewModal) { + this.markdownPreviewModal.destroy(); + this.markdownPreviewModal = null; + } + // Clear singleton reference - if (schemaBuilderInstance === this) { - schemaBuilderInstance = null; + if (click2CrawlInstance === this) { + click2CrawlInstance = null; } } @@ -97,8 +104,8 @@ class SchemaBuilder { - Crawl4AI -
๐Ÿ”ง Schema Builder
+
Click2Crawl
+ Crawl4AI
@@ -151,6 +158,9 @@ class SchemaBuilder { +
@@ -202,6 +212,7 @@ class SchemaBuilder { addClickHandler('c4ai-test', () => this.testSchema()); addClickHandler('c4ai-export-schema', () => this.exportSchema()); addClickHandler('c4ai-export-data', () => this.exportData()); + addClickHandler('c4ai-export-markdown', () => this.exportMarkdown()); addClickHandler('c4ai-deploy-cloud', () => this.deployToCloud()); addClickHandler('c4ai-close', () => this.stop()); @@ -273,10 +284,15 @@ class SchemaBuilder { handleClick(e) { const element = e.target; - // Check if clicking on our UI elements + // Check if clicking on our UI elements (including markdown preview modal) if (this.isOurElement(element)) { return; // Let toolbar clicks work normally } + + // Additional check for markdown preview modal classes + if (element.closest('.c4ai-c2c-preview') || element.closest('.c4ai-preview-options')) { + return; // Don't interfere with markdown preview modal + } // Use current element const targetElement = this.currentElement || element; @@ -303,7 +319,9 @@ class SchemaBuilder { isOurElement(element) { return window.C4AI_Utils.isOurElement(element) || - (this.selectedBox && element === this.selectedBox); + (this.selectedBox && element === this.selectedBox) || + (this.markdownPreviewModal && this.markdownPreviewModal.modal && + (element === this.markdownPreviewModal.modal || this.markdownPreviewModal.modal.contains(element))); } showSelectedBox(element) { @@ -499,6 +517,9 @@ class SchemaBuilder { } showFieldDialog(element) { + // Remove any existing field dialogs first + document.querySelectorAll('.c4ai-field-dialog').forEach(d => d.remove()); + const dialog = document.createElement('div'); dialog.className = 'c4ai-field-dialog'; @@ -922,6 +943,7 @@ class SchemaBuilder { document.getElementById('c4ai-test').disabled = false; document.getElementById('c4ai-export-schema').disabled = false; document.getElementById('c4ai-export-data').disabled = false; + document.getElementById('c4ai-export-markdown').disabled = false; document.getElementById('c4ai-deploy-cloud').disabled = false; } else { schemaSection.style.display = 'none'; @@ -976,6 +998,9 @@ class SchemaBuilder { const field = this.fields[index]; if (!field) return; + // Remove any existing field dialogs first + document.querySelectorAll('.c4ai-field-dialog').forEach(d => d.remove()); + // Re-show the field dialog with existing values const dialog = document.createElement('div'); dialog.className = 'c4ai-field-dialog'; @@ -1476,6 +1501,137 @@ class SchemaBuilder { await this.testSchema(); } + async exportMarkdown() { + // Initialize markdown converter if not already done + if (!this.markdownConverter) { + this.markdownConverter = new MarkdownConverter(); + } + if (!this.contentAnalyzer) { + this.contentAnalyzer = new ContentAnalyzer(); + } + + // Initialize markdown preview modal if not already done + if (!this.markdownPreviewModal) { + this.markdownPreviewModal = new MarkdownPreviewModal(); + } + + // Get all matching containers + const containers = document.querySelectorAll(this.container.selector); + if (containers.length === 0) { + this.showNotification('No matching containers found', 'error'); + return; + } + + // Show modal with callback to generate markdown + this.markdownPreviewModal.show(async (options) => { + return await this.generateMarkdownFromSchema(options); + }); + } + + + + + + async generateMarkdownFromSchema(options) { + // Get all matching containers + const containers = document.querySelectorAll(this.container.selector); + const markdownParts = []; + + for (let i = 0; i < containers.length; i++) { + const container = containers[i]; + + // Add XPath header if enabled + if (options.includeXPath) { + const xpath = this.getXPath(container); + markdownParts.push(`### Container ${i + 1} - XPath: \`${xpath}\`\n`); + } + + // Extract data based on schema fields + const extractedData = {}; + this.fields.forEach(field => { + try { + const element = container.querySelector(field.selector); + if (element) { + if (field.type === 'text') { + extractedData[field.name] = element.textContent.trim(); + } else if (field.type === 'attribute' && field.attribute) { + extractedData[field.name] = element.getAttribute(field.attribute); + } + } + } catch (e) { + // Skip invalid selectors + } + }); + + // Convert container to markdown based on options + const analysis = await this.contentAnalyzer.analyze([container]); + const containerMarkdown = await this.markdownConverter.convert([container], { + ...options, + analysis, + extractedData // Pass extracted data for context + }); + + // Trim the markdown before adding + const trimmedMarkdown = containerMarkdown.trim(); + markdownParts.push(trimmedMarkdown); + + // Add separator if enabled and not last element + if (options.addSeparators && i < containers.length - 1) { + markdownParts.push('\n---\n'); + } + } + + return markdownParts.join('\n'); + } + + getXPath(element) { + if (element.id) { + return `//*[@id="${element.id}"]`; + } + + const parts = []; + let current = element; + + while (current && current.nodeType === Node.ELEMENT_NODE) { + let index = 0; + let sibling = current.previousSibling; + + while (sibling) { + if (sibling.nodeType === Node.ELEMENT_NODE && sibling.nodeName === current.nodeName) { + index++; + } + sibling = sibling.previousSibling; + } + + const tagName = current.nodeName.toLowerCase(); + const part = index > 0 ? `${tagName}[${index + 1}]` : tagName; + parts.unshift(part); + + current = current.parentNode; + } + + return '/' + parts.join('/'); + } + + + + showNotification(message, type = 'success') { + const notification = document.createElement('div'); + notification.className = `c4ai-notification c4ai-notification-${type}`; + notification.textContent = message; + + document.body.appendChild(notification); + + // Animate in + setTimeout(() => notification.classList.add('show'), 10); + + // Remove after 3 seconds + setTimeout(() => { + notification.classList.remove('show'); + setTimeout(() => notification.remove(), 300); + }, 3000); + } + deployToCloud() { // Create cloud deployment modal const modal = document.createElement('div'); @@ -1808,5 +1964,5 @@ if __name__ == "__main__": // Export for use in content script if (typeof window !== 'undefined') { - window.SchemaBuilder = SchemaBuilder; + window.Click2Crawl = Click2Crawl; } \ No newline at end of file diff --git a/docs/md_v2/apps/crawl4ai-assistant/content/content.js b/docs/md_v2/apps/crawl4ai-assistant/content/content.js index f20efe3b..a4f63a99 100644 --- a/docs/md_v2/apps/crawl4ai-assistant/content/content.js +++ b/docs/md_v2/apps/crawl4ai-assistant/content/content.js @@ -1,5 +1,5 @@ // Main content script for Crawl4AI Assistant -// Coordinates between SchemaBuilder and ScriptBuilder +// Coordinates between Click2Crawl, ScriptBuilder, and MarkdownExtraction let activeBuilder = null; @@ -13,8 +13,8 @@ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { } if (request.mode === 'schema') { - console.log('Starting Schema Builder'); - activeBuilder = new SchemaBuilder(); + console.log('Starting Click2Crawl'); + activeBuilder = new Click2Crawl(); activeBuilder.start(); } else if (request.mode === 'script') { console.log('Starting Script Builder'); @@ -34,8 +34,8 @@ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { activeBuilder.deactivate?.(); activeBuilder = null; } - console.log('Starting Schema Builder'); - activeBuilder = new SchemaBuilder(); + console.log('Starting Click2Crawl'); + activeBuilder = new Click2Crawl(); activeBuilder.start(); sendResponse({ success: true }); } else if (request.action === 'startScriptCapture') { @@ -52,8 +52,8 @@ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { activeBuilder.deactivate?.(); activeBuilder = null; } - console.log('Starting Click2Crawl'); - activeBuilder = new Click2CrawlBuilder(); + console.log('Starting Markdown Extraction'); + activeBuilder = new MarkdownExtraction(); sendResponse({ success: true }); } else if (request.action === 'generateCode') { if (activeBuilder && activeBuilder.generateCode) { diff --git a/docs/md_v2/apps/crawl4ai-assistant/content/click2CrawlBuilder.js b/docs/md_v2/apps/crawl4ai-assistant/content/markdownExtraction.js similarity index 90% rename from docs/md_v2/apps/crawl4ai-assistant/content/click2CrawlBuilder.js rename to docs/md_v2/apps/crawl4ai-assistant/content/markdownExtraction.js index 4fdcc40e..6e4cf393 100644 --- a/docs/md_v2/apps/crawl4ai-assistant/content/click2CrawlBuilder.js +++ b/docs/md_v2/apps/crawl4ai-assistant/content/markdownExtraction.js @@ -1,26 +1,14 @@ -class Click2CrawlBuilder { +class MarkdownExtraction { constructor() { this.selectedElements = new Set(); this.highlightBoxes = new Map(); this.selectionMode = false; this.toolbar = null; - this.previewPanel = null; + this.markdownPreviewModal = null; this.selectionCounter = 0; this.markdownConverter = null; this.contentAnalyzer = null; - // Configuration options - this.options = { - includeImages: true, - preserveTables: true, - keepCodeFormatting: true, - simplifyLayout: false, - preserveLinks: true, - addSeparators: true, - includeXPath: false, - textOnly: false - }; - this.init(); } @@ -44,7 +32,7 @@ class Click2CrawlBuilder { - Click2Crawl + Markdown Extraction
@@ -363,19 +351,18 @@ class Click2CrawlBuilder { } async showPreview() { - // Generate markdown from selected elements - const markdown = await this.generateMarkdown(); - - // Create or update preview panel - if (!this.previewPanel) { - this.createPreviewPanel(); + // Initialize markdown preview modal if not already done + if (!this.markdownPreviewModal) { + this.markdownPreviewModal = new MarkdownPreviewModal(); } - await this.updatePreviewContent(markdown); - this.previewPanel.style.display = 'block'; + // Show modal with callback to generate markdown + this.markdownPreviewModal.show(async (options) => { + return await this.generateMarkdown(options); + }); } - createPreviewPanel() { + /* createPreviewPanel() { this.previewPanel = document.createElement('div'); this.previewPanel.className = 'c4ai-c2c-preview'; this.previewPanel.innerHTML = ` @@ -425,9 +412,9 @@ class Click2CrawlBuilder { this.previewPanel.style.zIndex = '999999'; this.setupPreviewEventListeners(); - } + } */ - setupPreviewEventListeners() { + /* setupPreviewEventListeners() { // Close button this.previewPanel.querySelector('.c4ai-preview-close').addEventListener('click', () => { this.previewPanel.style.display = 'none'; @@ -496,9 +483,9 @@ class Click2CrawlBuilder { this.previewPanel.querySelector('.c4ai-download-btn').addEventListener('click', () => { this.downloadMarkdown(); }); - } + } */ - switchPreviewTab(tabName) { + /* switchPreviewTab(tabName) { // Update active tab this.previewPanel.querySelectorAll('.c4ai-tab').forEach(tab => { tab.classList.toggle('active', tab.dataset.tab === tabName); @@ -508,9 +495,9 @@ class Click2CrawlBuilder { this.previewPanel.querySelectorAll('.c4ai-preview-pane').forEach(pane => { pane.classList.toggle('active', pane.dataset.pane === tabName); }); - } + } */ - async updatePreviewContent(markdown) { + /* async updatePreviewContent(markdown) { // Update markdown pane const markdownPane = this.previewPanel.querySelector('[data-pane="markdown"]'); markdownPane.innerHTML = `
${this.escapeHtml(markdown)}
`; @@ -535,19 +522,19 @@ class Click2CrawlBuilder { // Fallback if marked.js is not available previewPane.innerHTML = `
${this.escapeHtml(markdown)}
`; } - } + } */ - escapeHtml(unsafe) { + /* escapeHtml(unsafe) { return unsafe .replace(/&/g, "&") .replace(//g, ">") .replace(/"/g, """) .replace(/'/g, "'"); - } + } */ - async generateMarkdown() { + async generateMarkdown(options) { // Get selected elements as array const elements = Array.from(this.selectedElements); @@ -565,7 +552,7 @@ class Click2CrawlBuilder { const element = sortedElements[i]; // Add XPath header if enabled - if (this.options.includeXPath) { + if (options.includeXPath) { const xpath = this.getXPath(element); markdownParts.push(`### Element ${i + 1} - XPath: \`${xpath}\`\n`); } @@ -574,7 +561,7 @@ class Click2CrawlBuilder { let elementsToConvert = [element]; // If text-only mode and element is a TR, process the entire table for better context - if (this.options.textOnly && element.tagName === 'TR') { + if (options.textOnly && element.tagName === 'TR') { const table = element.closest('table'); if (table && !sortedElements.includes(table)) { // Only include this table row, not the whole table @@ -585,19 +572,21 @@ class Click2CrawlBuilder { // Analyze and convert individual element const analysis = await this.contentAnalyzer.analyze(elementsToConvert); const markdown = await this.markdownConverter.convert(elementsToConvert, { - ...this.options, + ...options, analysis }); - markdownParts.push(markdown.trim()); + // Trim the markdown before adding + const trimmedMarkdown = markdown.trim(); + markdownParts.push(trimmedMarkdown); // Add separator if enabled and not last element - if (this.options.addSeparators && i < sortedElements.length - 1) { - markdownParts.push('\n\n---\n\n'); + if (options.addSeparators && i < sortedElements.length - 1) { + markdownParts.push('\n---\n'); } } - return markdownParts.join('\n\n'); + return markdownParts.join('\n'); } getXPath(element) { @@ -642,35 +631,15 @@ class Click2CrawlBuilder { } async copyToClipboard() { - const markdown = await this.generateMarkdown(); - - try { - await navigator.clipboard.writeText(markdown); - this.showNotification('Markdown copied to clipboard!'); - } catch (err) { - console.error('Failed to copy:', err); - this.showNotification('Failed to copy. Please try again.', 'error'); + if (this.markdownPreviewModal) { + await this.markdownPreviewModal.copyToClipboard(); } } async downloadMarkdown() { - const markdown = await this.generateMarkdown(); - const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5); - const filename = `crawl4ai-export-${timestamp}.md`; - - // Create blob and download - const blob = new Blob([markdown], { type: 'text/markdown' }); - const url = URL.createObjectURL(blob); - - const a = document.createElement('a'); - a.href = url; - a.download = filename; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - URL.revokeObjectURL(url); - - this.showNotification(`Downloaded ${filename}`); + if (this.markdownPreviewModal) { + await this.markdownPreviewModal.downloadMarkdown(); + } } showNotification(message, type = 'success') { @@ -707,9 +676,9 @@ class Click2CrawlBuilder { this.toolbar = null; } - if (this.previewPanel) { - this.previewPanel.remove(); - this.previewPanel = null; + if (this.markdownPreviewModal) { + this.markdownPreviewModal.destroy(); + this.markdownPreviewModal = null; } // Remove hover styles @@ -726,7 +695,7 @@ class Click2CrawlBuilder { } } catch (error) { // Extension context might be invalidated, ignore the error - console.log('Click2Crawl deactivated (extension context unavailable)'); + console.log('Markdown Extraction deactivated (extension context unavailable)'); } } } \ No newline at end of file diff --git a/docs/md_v2/apps/crawl4ai-assistant/content/markdownPreviewModal.js b/docs/md_v2/apps/crawl4ai-assistant/content/markdownPreviewModal.js new file mode 100644 index 00000000..1cd1bca3 --- /dev/null +++ b/docs/md_v2/apps/crawl4ai-assistant/content/markdownPreviewModal.js @@ -0,0 +1,300 @@ +// Shared Markdown Preview Modal Component for Crawl4AI Assistant +// Used by both SchemaBuilder and Click2CrawlBuilder + +class MarkdownPreviewModal { + constructor(options = {}) { + this.modal = null; + this.markdownOptions = { + includeImages: true, + preserveTables: true, + keepCodeFormatting: true, + simplifyLayout: false, + preserveLinks: true, + addSeparators: true, + includeXPath: false, + textOnly: false, + ...options + }; + this.onGenerateMarkdown = null; + this.currentMarkdown = ''; + } + + show(generateMarkdownCallback) { + this.onGenerateMarkdown = generateMarkdownCallback; + + if (!this.modal) { + this.createModal(); + } + + // Generate initial markdown + this.updateContent(); + this.modal.style.display = 'block'; + } + + hide() { + if (this.modal) { + this.modal.style.display = 'none'; + } + } + + createModal() { + this.modal = document.createElement('div'); + this.modal.className = 'c4ai-c2c-preview'; + this.modal.innerHTML = ` +
+
+ + + +
+ Markdown Preview + +
+
+ + + + + + + + +
+
+
+ + + +
+
+
+
+
+ + + +
+ `; + + document.body.appendChild(this.modal); + + // Make modal draggable + if (window.C4AI_Utils && window.C4AI_Utils.makeDraggable) { + window.C4AI_Utils.makeDraggable(this.modal); + } + + // Position preview modal + this.modal.style.position = 'fixed'; + this.modal.style.top = '50%'; + this.modal.style.left = '50%'; + this.modal.style.transform = 'translate(-50%, -50%)'; + this.modal.style.zIndex = '999999'; + + this.setupEventListeners(); + } + + setupEventListeners() { + // Close button + this.modal.querySelector('.c4ai-preview-close').addEventListener('click', () => { + this.hide(); + }); + + // Tab switching + this.modal.querySelectorAll('.c4ai-tab').forEach(tab => { + tab.addEventListener('click', (e) => { + const tabName = e.target.dataset.tab; + this.switchTab(tabName); + }); + }); + + // Wrap toggle + const wrapToggle = this.modal.querySelector('.c4ai-wrap-toggle'); + wrapToggle.addEventListener('click', () => { + const panes = this.modal.querySelectorAll('.c4ai-preview-pane'); + panes.forEach(pane => { + pane.classList.toggle('wrap'); + }); + wrapToggle.classList.toggle('active'); + }); + + // Options change + this.modal.querySelectorAll('input[type="checkbox"]').forEach(checkbox => { + checkbox.addEventListener('change', async (e) => { + this.markdownOptions[e.target.name] = e.target.checked; + + // Handle text-only mode dependencies + if (e.target.name === 'textOnly' && e.target.checked) { + const preserveLinksCheckbox = this.modal.querySelector('input[name="preserveLinks"]'); + if (preserveLinksCheckbox) { + preserveLinksCheckbox.checked = false; + preserveLinksCheckbox.disabled = true; + this.markdownOptions.preserveLinks = false; + } + + const includeImagesCheckbox = this.modal.querySelector('input[name="includeImages"]'); + if (includeImagesCheckbox) { + includeImagesCheckbox.disabled = true; + } + } else if (e.target.name === 'textOnly' && !e.target.checked) { + // Re-enable options when text-only is disabled + const preserveLinksCheckbox = this.modal.querySelector('input[name="preserveLinks"]'); + if (preserveLinksCheckbox) { + preserveLinksCheckbox.disabled = false; + } + + const includeImagesCheckbox = this.modal.querySelector('input[name="includeImages"]'); + if (includeImagesCheckbox) { + includeImagesCheckbox.disabled = false; + } + } + + // Update markdown content + await this.updateContent(); + }); + }); + + // Action buttons + this.modal.querySelector('.c4ai-copy-markdown-btn').addEventListener('click', () => { + this.copyToClipboard(); + }); + + this.modal.querySelector('.c4ai-download-btn').addEventListener('click', () => { + this.downloadMarkdown(); + }); + } + + switchTab(tabName) { + // Update active tab + this.modal.querySelectorAll('.c4ai-tab').forEach(tab => { + tab.classList.toggle('active', tab.dataset.tab === tabName); + }); + + // Update active pane + this.modal.querySelectorAll('.c4ai-preview-pane').forEach(pane => { + pane.classList.toggle('active', pane.dataset.pane === tabName); + }); + } + + async updateContent() { + if (!this.onGenerateMarkdown) return; + + try { + // Generate markdown with current options + this.currentMarkdown = await this.onGenerateMarkdown(this.markdownOptions); + + // Update markdown pane + const markdownPane = this.modal.querySelector('[data-pane="markdown"]'); + markdownPane.innerHTML = `
${this.escapeHtml(this.currentMarkdown)}
`; + + // Update preview pane + const previewPane = this.modal.querySelector('[data-pane="preview"]'); + + // Use marked.js if available + if (window.marked) { + marked.setOptions({ + gfm: true, + breaks: true, + tables: true, + headerIds: false, + mangle: false + }); + + const html = marked.parse(this.currentMarkdown); + previewPane.innerHTML = `
${html}
`; + } else { + // Fallback + previewPane.innerHTML = `
${this.escapeHtml(this.currentMarkdown)}
`; + } + } catch (error) { + console.error('Error generating markdown:', error); + this.showNotification('Error generating markdown', 'error'); + } + } + + async copyToClipboard() { + try { + await navigator.clipboard.writeText(this.currentMarkdown); + this.showNotification('Markdown copied to clipboard!'); + } catch (err) { + console.error('Failed to copy:', err); + this.showNotification('Failed to copy. Please try again.', 'error'); + } + } + + async downloadMarkdown() { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5); + const filename = `crawl4ai-export-${timestamp}.md`; + + // Create blob and download + const blob = new Blob([this.currentMarkdown], { type: 'text/markdown' }); + const url = URL.createObjectURL(blob); + + const a = document.createElement('a'); + a.href = url; + a.download = filename; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + + this.showNotification(`Downloaded ${filename}`); + } + + showNotification(message, type = 'success') { + const notification = document.createElement('div'); + notification.className = `c4ai-notification c4ai-notification-${type}`; + notification.textContent = message; + + document.body.appendChild(notification); + + // Animate in + setTimeout(() => notification.classList.add('show'), 10); + + // Remove after 3 seconds + setTimeout(() => { + notification.classList.remove('show'); + setTimeout(() => notification.remove(), 300); + }, 3000); + } + + escapeHtml(unsafe) { + return unsafe + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); + } + + // Get current options + getOptions() { + return { ...this.markdownOptions }; + } + + // Update options programmatically + setOptions(options) { + this.markdownOptions = { ...this.markdownOptions, ...options }; + + // Update checkboxes to reflect new options + Object.entries(options).forEach(([key, value]) => { + const checkbox = this.modal?.querySelector(`input[name="${key}"]`); + if (checkbox && typeof value === 'boolean') { + checkbox.checked = value; + } + }); + } + + // Cleanup + destroy() { + if (this.modal) { + this.modal.remove(); + this.modal = null; + } + this.onGenerateMarkdown = null; + } +} + +// Export for use in other scripts +if (typeof window !== 'undefined') { + window.MarkdownPreviewModal = MarkdownPreviewModal; +} \ No newline at end of file diff --git a/docs/md_v2/apps/crawl4ai-assistant/content/overlay.css b/docs/md_v2/apps/crawl4ai-assistant/content/overlay.css index af8235e9..56c1799a 100644 --- a/docs/md_v2/apps/crawl4ai-assistant/content/overlay.css +++ b/docs/md_v2/apps/crawl4ai-assistant/content/overlay.css @@ -127,6 +127,7 @@ /* macOS-style titlebar */ .c4ai-toolbar-titlebar { + gap: 1em; display: flex; align-items: center; padding: 12px; diff --git a/docs/md_v2/apps/crawl4ai-assistant/content/schemaBuilder_v1.js b/docs/md_v2/apps/crawl4ai-assistant/content/schemaBuilder_v1.js deleted file mode 100644 index 0f0154a1..00000000 --- a/docs/md_v2/apps/crawl4ai-assistant/content/schemaBuilder_v1.js +++ /dev/null @@ -1,608 +0,0 @@ -// SchemaBuilder class for Crawl4AI Chrome Extension -class SchemaBuilder { - constructor() { - this.mode = null; - this.container = null; - this.fields = []; - this.overlay = null; - this.toolbar = null; - this.highlightBox = null; - this.selectedElements = new Set(); - this.isPaused = false; - this.codeModal = null; - - this.handleMouseMove = this.handleMouseMove.bind(this); - this.handleClick = this.handleClick.bind(this); - this.handleKeyPress = this.handleKeyPress.bind(this); - } - - start() { - this.mode = 'container'; - this.createOverlay(); - this.createToolbar(); - this.attachEventListeners(); - this.updateToolbar(); - } - - stop() { - this.detachEventListeners(); - this.overlay?.remove(); - this.toolbar?.remove(); - this.highlightBox?.remove(); - this.removeAllHighlights(); - this.mode = null; - this.container = null; - this.fields = []; - this.selectedElements.clear(); - } - - createOverlay() { - // Create highlight box - this.highlightBox = document.createElement('div'); - this.highlightBox.className = 'c4ai-highlight-box'; - document.body.appendChild(this.highlightBox); - } - - createToolbar() { - this.toolbar = document.createElement('div'); - this.toolbar.className = 'c4ai-toolbar'; - this.toolbar.innerHTML = ` -
-
- - - -
- Crawl4AI -
Crawl4AI Schema Builder
-
-
-
-
- Mode: - Select Container -
-
- Container: - Not selected -
-
- -
- Click on a container element (e.g., product card, article, etc.) -
-
- - -
-
- `; - document.body.appendChild(this.toolbar); - - // Add event listeners for toolbar buttons - document.getElementById('c4ai-pause').addEventListener('click', () => this.togglePause()); - document.getElementById('c4ai-generate').addEventListener('click', () => this.stopAndGenerate()); - document.getElementById('c4ai-close').addEventListener('click', () => this.stop()); - - // Make toolbar draggable - window.C4AI_Utils.makeDraggable(this.toolbar); - } - - attachEventListeners() { - document.addEventListener('mousemove', this.handleMouseMove, true); - document.addEventListener('click', this.handleClick, true); - document.addEventListener('keydown', this.handleKeyPress, true); - } - - detachEventListeners() { - document.removeEventListener('mousemove', this.handleMouseMove, true); - document.removeEventListener('click', this.handleClick, true); - document.removeEventListener('keydown', this.handleKeyPress, true); - } - - handleMouseMove(e) { - if (this.isPaused) return; - - const element = document.elementFromPoint(e.clientX, e.clientY); - if (element && !this.isOurElement(element)) { - this.highlightElement(element); - } - } - - handleClick(e) { - if (this.isPaused) return; - - const element = e.target; - - if (this.isOurElement(element)) { - return; - } - - e.preventDefault(); - e.stopPropagation(); - - if (this.mode === 'container') { - this.selectContainer(element); - } else if (this.mode === 'field') { - this.selectField(element); - } - } - - handleKeyPress(e) { - if (e.key === 'Escape') { - this.stop(); - } - } - - isOurElement(element) { - return window.C4AI_Utils.isOurElement(element); - } - - togglePause() { - this.isPaused = !this.isPaused; - const pauseBtn = document.getElementById('c4ai-pause'); - if (this.isPaused) { - pauseBtn.innerHTML = 'โ–ถ Resume'; - pauseBtn.classList.add('c4ai-paused'); - this.highlightBox.style.display = 'none'; - } else { - pauseBtn.innerHTML = 'โธ Pause'; - pauseBtn.classList.remove('c4ai-paused'); - } - } - - stopAndGenerate() { - if (!this.container || this.fields.length === 0) { - alert('Please select a container and at least one field before generating code.'); - return; - } - - const code = this.generateCode(); - this.showCodeModal(code); - } - - highlightElement(element) { - const rect = element.getBoundingClientRect(); - this.highlightBox.style.cssText = ` - left: ${rect.left + window.scrollX}px; - top: ${rect.top + window.scrollY}px; - width: ${rect.width}px; - height: ${rect.height}px; - display: block; - `; - - if (this.mode === 'container') { - this.highlightBox.className = 'c4ai-highlight-box c4ai-container-mode'; - } else { - this.highlightBox.className = 'c4ai-highlight-box c4ai-field-mode'; - } - } - - selectContainer(element) { - // Remove previous container highlight - if (this.container) { - this.container.element.classList.remove('c4ai-selected-container'); - } - - this.container = { - element: element, - html: element.outerHTML, - selector: this.generateSelector(element), - tagName: element.tagName.toLowerCase() - }; - - element.classList.add('c4ai-selected-container'); - this.mode = 'field'; - this.updateToolbar(); - this.updateStats(); - } - - selectField(element) { - // Don't select the container itself - if (element === this.container.element) { - return; - } - - // Check if already selected - if so, deselect it - if (this.selectedElements.has(element)) { - this.deselectField(element); - return; - } - - // Must be inside the container - if (!this.container.element.contains(element)) { - return; - } - - this.showFieldDialog(element); - } - - deselectField(element) { - // Remove from fields array - this.fields = this.fields.filter(f => f.element !== element); - - // Remove from selected elements set - this.selectedElements.delete(element); - - // Remove visual selection - element.classList.remove('c4ai-selected-field'); - - // Update UI - this.updateToolbar(); - this.updateStats(); - } - - showFieldDialog(element) { - const dialog = document.createElement('div'); - dialog.className = 'c4ai-field-dialog'; - - const rect = element.getBoundingClientRect(); - dialog.style.cssText = ` - left: ${rect.left + window.scrollX}px; - top: ${rect.bottom + window.scrollY + 10}px; - `; - - dialog.innerHTML = ` -
-

Name this field:

- -
- Content: ${element.textContent.trim().substring(0, 50)}... -
-
- - -
-
- `; - - document.body.appendChild(dialog); - - const input = dialog.querySelector('#c4ai-field-name'); - const saveBtn = dialog.querySelector('#c4ai-field-save'); - const cancelBtn = dialog.querySelector('#c4ai-field-cancel'); - - const save = () => { - const fieldName = input.value.trim(); - if (fieldName) { - this.fields.push({ - name: fieldName, - value: element.textContent.trim(), - element: element, - selector: this.generateSelector(element, this.container.element) - }); - - element.classList.add('c4ai-selected-field'); - this.selectedElements.add(element); - this.updateToolbar(); - this.updateStats(); - } - dialog.remove(); - }; - - const cancel = () => { - dialog.remove(); - }; - - saveBtn.addEventListener('click', save); - cancelBtn.addEventListener('click', cancel); - input.addEventListener('keypress', (e) => { - if (e.key === 'Enter') save(); - if (e.key === 'Escape') cancel(); - }); - - input.focus(); - } - - generateSelector(element, context = document) { - // Try to generate a robust selector - if (element.id) { - return `#${CSS.escape(element.id)}`; - } - - // Check for data attributes (most stable) - const dataAttrs = ['data-testid', 'data-id', 'data-test', 'data-cy']; - for (const attr of dataAttrs) { - const value = element.getAttribute(attr); - if (value) { - return `[${attr}="${value}"]`; - } - } - - // Check for aria-label - if (element.getAttribute('aria-label')) { - return `[aria-label="${element.getAttribute('aria-label')}"]`; - } - - // Try semantic HTML elements with text - const tagName = element.tagName.toLowerCase(); - if (['button', 'a', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) { - const text = element.textContent.trim(); - if (text && text.length < 50) { - // Use tag name with partial text match - return `${tagName}`; - } - } - - // Check for simple, non-utility classes - const classes = Array.from(element.classList) - .filter(c => !c.startsWith('c4ai-')) // Exclude our classes - .filter(c => !c.includes('[') && !c.includes('(') && !c.includes(':')) // Exclude utility classes - .filter(c => c.length < 30); // Exclude very long classes - - if (classes.length > 0 && classes.length <= 3) { - const selector = classes.map(c => `.${CSS.escape(c)}`).join(''); - try { - if (context.querySelectorAll(selector).length === 1) { - return selector; - } - } catch (e) { - // Invalid selector, continue - } - } - - // Use nth-child with simple parent tag - const parent = element.parentElement; - if (parent && parent !== context) { - const siblings = Array.from(parent.children); - const index = siblings.indexOf(element) + 1; - // Just use parent tag name to avoid recursion - const parentTag = parent.tagName.toLowerCase(); - return `${parentTag} > ${tagName}:nth-child(${index})`; - } - - // Final fallback - return tagName; - } - - updateToolbar() { - document.getElementById('c4ai-mode').textContent = - this.mode === 'container' ? 'Select Container' : 'Select Fields'; - - document.getElementById('c4ai-container').textContent = - this.container ? `${this.container.tagName} โœ“` : 'Not selected'; - - // Update fields list - const fieldsList = document.getElementById('c4ai-fields-list'); - const fieldsItems = document.getElementById('c4ai-fields-items'); - - if (this.fields.length > 0) { - fieldsList.style.display = 'block'; - fieldsItems.innerHTML = this.fields.map(field => ` -
  • - ${field.name} - ${field.value.substring(0, 30)}${field.value.length > 30 ? '...' : ''} -
  • - `).join(''); - } else { - fieldsList.style.display = 'none'; - } - - const hint = document.getElementById('c4ai-hint'); - if (this.mode === 'container') { - hint.textContent = 'Click on a container element (e.g., product card, article, etc.)'; - } else if (this.fields.length === 0) { - hint.textContent = 'Click on fields inside the container to extract (title, price, etc.)'; - } else { - hint.innerHTML = `Continue selecting fields or click Stop & Generate to finish.`; - } - } - - updateStats() { - chrome.runtime.sendMessage({ - action: 'updateStats', - stats: { - container: !!this.container, - fields: this.fields.length - } - }); - } - - removeAllHighlights() { - document.querySelectorAll('.c4ai-selected-container').forEach(el => { - el.classList.remove('c4ai-selected-container'); - }); - document.querySelectorAll('.c4ai-selected-field').forEach(el => { - el.classList.remove('c4ai-selected-field'); - }); - } - - generateCode() { - const fieldDescriptions = this.fields.map(f => - `- ${f.name} (example: "${f.value.substring(0, 50)}...")` - ).join('\n'); - - return `#!/usr/bin/env python3 -""" -Generated by Crawl4AI Chrome Extension -URL: ${window.location.href} -Generated: ${new Date().toISOString()} -""" - -import asyncio -import json -from pathlib import Path -from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig -from crawl4ai.extraction_strategy import JsonCssExtractionStrategy - -# HTML snippet of the selected container element -HTML_SNIPPET = """ -${this.container.html} -""" - -# Extraction query based on your field selections -EXTRACTION_QUERY = """ -Create a JSON CSS extraction schema to extract the following fields: -${fieldDescriptions} - -The schema should handle multiple ${this.container.tagName} elements on the page. -Each item should be extracted as a separate object in the results array. -""" - -async def generate_schema(): - """Generate extraction schema using LLM""" - print("๐Ÿ”ง Generating extraction schema...") - - try: - # Generate the schema using Crawl4AI's built-in LLM integration - schema = JsonCssExtractionStrategy.generate_schema( - html=HTML_SNIPPET, - query=EXTRACTION_QUERY, - ) - - # Save the schema for reuse - schema_path = Path('generated_schema.json') - with open(schema_path, 'w') as f: - json.dump(schema, f, indent=2) - - print("โœ… Schema generated successfully!") - print(f"๐Ÿ“„ Schema saved to: {schema_path}") - print("\\nGenerated schema:") - print(json.dumps(schema, indent=2)) - - return schema - - except Exception as e: - print(f"โŒ Error generating schema: {e}") - return None - -async def test_extraction(url: str = "${window.location.href}"): - """Test the generated schema on the actual webpage""" - print("\\n๐Ÿงช Testing extraction on live webpage...") - - # Load the generated schema - try: - with open('generated_schema.json', 'r') as f: - schema = json.load(f) - except FileNotFoundError: - print("โŒ Schema file not found. Run generate_schema() first.") - return - - # Configure browser - browser_config = BrowserConfig( - headless=True, - verbose=False - ) - - # Configure extraction - crawler_config = CrawlerRunConfig( - extraction_strategy=JsonCssExtractionStrategy(schema=schema) - ) - - async with AsyncWebCrawler(config=browser_config) as crawler: - result = await crawler.arun( - url=url, - config=crawler_config - ) - - if result.success and result.extracted_content: - data = json.loads(result.extracted_content) - print(f"\\nโœ… Successfully extracted {len(data)} items!") - - # Save results - with open('extracted_data.json', 'w') as f: - json.dump(data, f, indent=2) - - # Show sample results - print("\\n๐Ÿ“Š Sample results (first 2 items):") - for i, item in enumerate(data[:2], 1): - print(f"\\nItem {i}:") - for key, value in item.items(): - print(f" {key}: {value}") - else: - print("โŒ Extraction failed:", result.error_message) - -if __name__ == "__main__": - # Step 1: Generate the schema from HTML snippet - asyncio.run(generate_schema()) - - # Step 2: Test extraction on the live webpage - # Uncomment the line below to test extraction: - # asyncio.run(test_extraction()) - - print("\\n๐ŸŽฏ Next steps:") - print("1. Review the generated schema in 'generated_schema.json'") - print("2. Uncomment the test_extraction() line to test on the live site") - print("3. Use the schema in your Crawl4AI projects!") -`; - - return code; - } - - showCodeModal(code) { - // Create modal - this.codeModal = document.createElement('div'); - this.codeModal.className = 'c4ai-code-modal'; - this.codeModal.innerHTML = ` -
    -
    -

    Generated Python Code

    - -
    -
    -
    ${window.C4AI_Utils.escapeHtml(code)}
    -
    - -
    - `; - - document.body.appendChild(this.codeModal); - - // Add event listeners - document.getElementById('c4ai-close-modal').addEventListener('click', () => { - this.codeModal.remove(); - this.codeModal = null; - // Don't stop the capture session - }); - - document.getElementById('c4ai-download-code').addEventListener('click', () => { - chrome.runtime.sendMessage({ - action: 'downloadCode', - code: code, - filename: `crawl4ai_schema_${Date.now()}.py` - }, (response) => { - if (response && response.success) { - const btn = document.getElementById('c4ai-download-code'); - const originalHTML = btn.innerHTML; - btn.innerHTML = 'โœ“ Downloaded!'; - setTimeout(() => { - btn.innerHTML = originalHTML; - }, 2000); - } else { - console.error('Download failed:', response?.error); - alert('Download failed. Please check your browser settings.'); - } - }); - }); - - document.getElementById('c4ai-copy-code').addEventListener('click', () => { - navigator.clipboard.writeText(code).then(() => { - const btn = document.getElementById('c4ai-copy-code'); - btn.innerHTML = 'โœ“ Copied!'; - setTimeout(() => { - btn.innerHTML = '๐Ÿ“‹ Copy to Clipboard'; - }, 2000); - }); - }); - - // Apply syntax highlighting - window.C4AI_Utils.applySyntaxHighlighting(this.codeModal.querySelector('.language-python')); - } -} \ No newline at end of file diff --git a/docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.0.0.zip b/docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.0.0.zip deleted file mode 100644 index d8fec586..00000000 Binary files a/docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.0.0.zip and /dev/null differ diff --git a/docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.0.1.zip b/docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.0.1.zip deleted file mode 100644 index bc782873..00000000 Binary files a/docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.0.1.zip and /dev/null differ diff --git a/docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.2.0.zip b/docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.2.0.zip deleted file mode 100644 index 31730839..00000000 Binary files a/docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.2.0.zip and /dev/null differ diff --git a/docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.1.0.zip b/docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.3.0.zip similarity index 53% rename from docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.1.0.zip rename to docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.3.0.zip index 191aebec..a04976ce 100644 Binary files a/docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.1.0.zip and b/docs/md_v2/apps/crawl4ai-assistant/crawl4ai-assistant-v1.3.0.zip differ diff --git a/docs/md_v2/apps/crawl4ai-assistant/index.html b/docs/md_v2/apps/crawl4ai-assistant/index.html index a22c0365..abe082dc 100644 --- a/docs/md_v2/apps/crawl4ai-assistant/index.html +++ b/docs/md_v2/apps/crawl4ai-assistant/index.html @@ -61,14 +61,14 @@

    Transform any website into structured data with just a few clicks! The Crawl4AI Assistant Chrome Extension provides three powerful tools for web scraping and data extraction.

    - ๐ŸŽ‰ NEW: Schema Builder now extracts data INSTANTLY without any LLM! Test your schema and see JSON results immediately in the browser! + ๐ŸŽ‰ NEW: Click2Crawl extracts data INSTANTLY without any LLM! Test your schema and see JSON results immediately in the browser!
    ๐ŸŽฏ -

    Schema Builder

    -

    Extract data instantly without LLMs - see results in real-time!

    +

    Click2Crawl

    +

    Visual data extraction - click elements to build schemas instantly!

    ๐Ÿ”ด @@ -77,8 +77,8 @@
    ๐Ÿ“ -

    Click2Crawl (New!)

    -

    Select multiple elements to extract clean markdown "as you see"

    +

    Markdown Extraction (New!)

    +

    Convert any webpage content to clean markdown with Visual Text Mode

    -
    -
    ๐Ÿ“Š
    +
    +
    ๐ŸŽฏ
    -

    Schema Builder

    +

    Click2Crawl

    Visual data extraction

    Available
    @@ -154,11 +154,11 @@
    Alpha
    -
    +
    ๐Ÿ“
    -

    Click2Crawl

    -

    Markdown extraction

    +

    Markdown Extraction

    +

    Content to markdown

    New!
    @@ -166,11 +166,11 @@
    - -
    + +
    -

    ๐Ÿ“Š Schema Builder

    - No LLM needed - Extract data instantly! +

    ๐ŸŽฏ Click2Crawl

    + Click elements to build extraction schemas - No LLM needed!
    @@ -199,8 +199,8 @@
    3
    -

    Test & Extract Data NOW!

    -

    ๐ŸŽ‰ Click "Test Schema" to extract ALL matching data instantly - no coding required!

    +

    Test & Extract Data Instantly!

    +

    ๐ŸŽ‰ Click "Test Schema" to see extracted JSON immediately - no LLM or coding required!

    โšก See extracted JSON immediately
    @@ -210,11 +210,12 @@
    ๐Ÿš€ Zero LLM dependency
    -
    ๐Ÿ“Š Instant data extraction
    -
    ๐ŸŽฏ Smart selector generation
    -
    ๐Ÿ Ready-to-run Python code
    -
    โœจ Preview matching elements
    -
    ๐Ÿ“ฅ Download JSON results
    +
    ๐Ÿ“Š Instant JSON extraction
    +
    ๐ŸŽฏ Visual element selection
    +
    ๐Ÿ Export Python code
    +
    โœจ Live preview
    +
    ๐Ÿ“ฅ Download results
    +
    ๐Ÿ“ Export to markdown
    @@ -268,11 +269,11 @@
    - -
    + +
    -

    ๐Ÿ“ Click2Crawl

    - Select multiple elements to extract clean markdown +

    ๐Ÿ“ Markdown Extraction

    + Convert webpage content to clean markdown "as you see"
    @@ -312,9 +313,9 @@
    Multi-select with Ctrl/Cmd
    -
    Visual Text Mode
    -
    Smart formatting
    -
    Cloud export (soon)
    +
    Visual Text Mode (As You See)
    +
    Clean markdown output
    +
    Export to Crawl4AI Cloud (soon)
    @@ -326,26 +327,26 @@

    See the Generated Code & Extracted Data

    - + - +
    - +
    - schema_extraction.py + click2crawl_extraction.py
    #!/usr/bin/env python3
     """
     ๐ŸŽ‰ NO LLM NEEDED! Direct extraction with CSS selectors
    -Generated by Crawl4AI Chrome Extension
    +Generated by Crawl4AI Chrome Extension - Click2Crawl
     """
     
     import asyncio
    @@ -353,7 +354,7 @@ Generated by Crawl4AI Chrome Extension
     from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
     from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
     
    -# The EXACT schema from your visual clicks - no guessing!
    +# The EXACT schema from Click2Crawl - no guessing!
     EXTRACTION_SCHEMA = {
         "name": "Product Catalog",
         "baseSelector": "div.product-card",  # The container you selected
    @@ -515,7 +516,7 @@ asyncio.run(automate_shopping())
    - +
    @@ -692,20 +693,20 @@ Today, finding a 24-hour restaurant in Manhattan requires genuine effort. The pa
    Direct -

    Get CrawlResult Without Code

    +

    Direct Data Download

    -

    Skip the code generation entirely! Get extracted data directly in the extension as a CrawlResult object, ready to download as JSON.

    +

    Skip the code generation entirely! Download extracted data directly from Click2Crawl as JSON or CSV files.

    - ๐Ÿ“Š One-click extraction โ€ข No Python needed โ€ข Export to JSON/CSV + ๐Ÿ“Š One-click download โ€ข No Python needed โ€ข Multiple export formats
    AI -

    Smart Schema Suggestions

    +

    Smart Field Detection

    -

    AI-powered field detection that automatically suggests the most likely data fields on any page, making schema building even faster.

    +

    AI-powered field detection for Click2Crawl that automatically suggests the most likely data fields on any page.

    ๐Ÿค– Auto-detect fields โ€ข Smart naming โ€ข Pattern recognition
    @@ -758,7 +759,10 @@ Today, finding a 24-hour restaurant in Manhattan requires genuine effort. The pa // Show corresponding content const toolId = this.getAttribute('data-tool'); - document.getElementById(toolId).classList.add('active'); + const contentElement = document.getElementById(toolId); + if (contentElement) { + contentElement.classList.add('active'); + } }); }); diff --git a/docs/md_v2/apps/crawl4ai-assistant/manifest.json b/docs/md_v2/apps/crawl4ai-assistant/manifest.json index 663a7208..cbf144d7 100644 --- a/docs/md_v2/apps/crawl4ai-assistant/manifest.json +++ b/docs/md_v2/apps/crawl4ai-assistant/manifest.json @@ -1,7 +1,7 @@ { "manifest_version": 3, "name": "Crawl4AI Assistant", - "version": "1.2.1", + "version": "1.3.0", "description": "Visual schema and script builder for Crawl4AI - Build extraction schemas and automation scripts by clicking and recording actions", "permissions": [ "activeTab", @@ -25,11 +25,12 @@ "js": [ "libs/marked.min.js", "content/shared/utils.js", - "content/schemaBuilder.js", + "content/markdownPreviewModal.js", + "content/click2crawl.js", "content/scriptBuilder.js", "content/contentAnalyzer.js", "content/markdownConverter.js", - "content/click2CrawlBuilder.js", + "content/markdownExtraction.js", "content/content.js" ], "css": ["content/overlay.css"], diff --git a/docs/md_v2/apps/crawl4ai-assistant/popup/popup.css b/docs/md_v2/apps/crawl4ai-assistant/popup/popup.css index 4cd75583..4ff8e1bb 100644 --- a/docs/md_v2/apps/crawl4ai-assistant/popup/popup.css +++ b/docs/md_v2/apps/crawl4ai-assistant/popup/popup.css @@ -38,6 +38,8 @@ body { font-family: var(--font-primary); background: #0a0a0a; color: #e0e0e0; + border-radius: 16px; + overflow: hidden; } .popup-container { diff --git a/docs/md_v2/apps/crawl4ai-assistant/popup/popup.html b/docs/md_v2/apps/crawl4ai-assistant/popup/popup.html index f974f366..6a54f6e3 100644 --- a/docs/md_v2/apps/crawl4ai-assistant/popup/popup.html +++ b/docs/md_v2/apps/crawl4ai-assistant/popup/popup.html @@ -23,15 +23,15 @@
    @@ -72,10 +72,10 @@
    -
    +