Files
crawl4ai/docs/md_v2/apps/crawl4ai-assistant/content/click2CrawlBuilder.js
UncleCode 0ac12da9f3 feat: Major Chrome Extension overhaul with Click2Crawl, instant Schema extraction, and modular architecture
 New Features:
- Click2Crawl: Visual element selection with markdown conversion
  - Ctrl/Cmd+Click to select multiple elements
  - Visual text mode for WYSIWYG extraction
  - Real-time markdown preview with syntax highlighting
  - Export to .md file or clipboard

- Schema Builder Enhancement: Instant data extraction without LLMs
  - Test schemas directly in browser
  - See JSON results immediately
  - Export data or Python code
  - Cloud deployment ready (coming soon)

- Modular Architecture:
  - Separated into schemaBuilder.js, scriptBuilder.js, click2CrawlBuilder.js
  - Added contentAnalyzer.js and markdownConverter.js modules
  - Shared utilities and CSS reset system
  - Integrated marked.js for markdown rendering

🎨 UI/UX Improvements:
- Added edgy cloud announcement banner with seamless shimmer animation
- Direct, technical copy: "You don't need Puppeteer. You need Crawl4AI Cloud."
- Enhanced feature cards with emojis
- Fixed CSS conflicts with targeted reset approach
- Improved badge hover effects (red on hover)
- Added wrap toggle for code preview

📚 Documentation Updates:
- Split extraction diagrams into LLM and no-LLM versions
- Updated llms-full.txt with latest content
- Added versioned LLM context (v0.1.1)

🔧 Technical Enhancements:
- Refactored 3464 lines of monolithic content.js into modules
- Added proper event handling and cleanup
- Improved z-index management
- Better scroll position tracking for badges
- Enhanced error handling throughout

This release transforms the Chrome Extension from a simple tool into a powerful
visual data extraction suite, making web scraping accessible to everyone.
2025-06-09 23:18:27 +08:00

732 lines
24 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
class Click2CrawlBuilder {
constructor() {
this.selectedElements = new Set();
this.highlightBoxes = new Map();
this.selectionMode = false;
this.toolbar = null;
this.previewPanel = null;
this.selectionCounter = 0;
this.markdownConverter = null;
this.contentAnalyzer = null;
// Configuration options
this.options = {
includeImages: true,
preserveTables: true,
keepCodeFormatting: true,
simplifyLayout: false,
preserveLinks: true,
addSeparators: true,
includeXPath: false,
textOnly: false
};
this.init();
}
async init() {
// Initialize dependencies
this.markdownConverter = new MarkdownConverter();
this.contentAnalyzer = new ContentAnalyzer();
this.createToolbar();
this.setupEventListeners();
}
createToolbar() {
// Create floating toolbar
this.toolbar = document.createElement('div');
this.toolbar.className = 'c4ai-c2c-toolbar';
this.toolbar.innerHTML = `
<div class="c4ai-toolbar-header">
<div class="c4ai-toolbar-dots">
<span class="c4ai-dot c4ai-dot-red"></span>
<span class="c4ai-dot c4ai-dot-yellow"></span>
<span class="c4ai-dot c4ai-dot-green"></span>
</div>
<span class="c4ai-toolbar-title">Click2Crawl</span>
<button class="c4ai-close-btn" title="Close">×</button>
</div>
<div class="c4ai-toolbar-content">
<div class="c4ai-selection-info">
<span class="c4ai-selection-count">0 elements selected</span>
<button class="c4ai-clear-btn" title="Clear selection" disabled>Clear</button>
</div>
<div class="c4ai-toolbar-actions">
<button class="c4ai-preview-btn" disabled>Preview Markdown</button>
<button class="c4ai-copy-btn" disabled>Copy to Clipboard</button>
</div>
<div class="c4ai-toolbar-instructions">
<p>💡 <strong>Ctrl/Cmd + Click</strong> to select multiple elements</p>
<p>📝 Selected elements will be converted to clean markdown</p>
<p>⌨️ Press <strong>ESC</strong> to exit</p>
</div>
</div>
`;
document.body.appendChild(this.toolbar);
makeDraggableByHeader(this.toolbar);
// Position toolbar
this.toolbar.style.position = 'fixed';
this.toolbar.style.top = '20px';
this.toolbar.style.right = '20px';
this.toolbar.style.zIndex = '999999';
}
setupEventListeners() {
// Close button
this.toolbar.querySelector('.c4ai-close-btn').addEventListener('click', () => {
this.deactivate();
});
// Clear selection button
this.toolbar.querySelector('.c4ai-clear-btn').addEventListener('click', () => {
this.clearSelection();
});
// Preview button
this.toolbar.querySelector('.c4ai-preview-btn').addEventListener('click', () => {
this.showPreview();
});
// Copy button
this.toolbar.querySelector('.c4ai-copy-btn').addEventListener('click', () => {
this.copyToClipboard();
});
// Document click handler for element selection
this.documentClickHandler = (event) => this.handleElementClick(event);
document.addEventListener('click', this.documentClickHandler, true);
// Prevent default link behavior during selection mode
this.linkClickHandler = (event) => {
if (event.ctrlKey || event.metaKey) {
event.preventDefault();
event.stopPropagation();
}
};
document.addEventListener('click', this.linkClickHandler, true);
// Hover effect
this.documentHoverHandler = (event) => this.handleElementHover(event);
document.addEventListener('mouseover', this.documentHoverHandler, true);
// Remove hover on mouseout
this.documentMouseOutHandler = (event) => this.handleElementMouseOut(event);
document.addEventListener('mouseout', this.documentMouseOutHandler, true);
// Keyboard shortcuts
this.keyboardHandler = (event) => this.handleKeyboard(event);
document.addEventListener('keydown', this.keyboardHandler);
}
handleElementClick(event) {
// Check if Ctrl/Cmd is pressed
if (!event.ctrlKey && !event.metaKey) return;
// Prevent default behavior
event.preventDefault();
event.stopPropagation();
const element = event.target;
// Don't select our own UI elements
if (element.closest('.c4ai-c2c-toolbar') ||
element.closest('.c4ai-c2c-preview') ||
element.closest('.c4ai-highlight-box')) {
return;
}
// Toggle element selection
if (this.selectedElements.has(element)) {
this.deselectElement(element);
} else {
this.selectElement(element);
}
this.updateUI();
}
handleElementHover(event) {
const element = event.target;
// Don't hover our own UI elements
if (element.closest('.c4ai-c2c-toolbar') ||
element.closest('.c4ai-c2c-preview') ||
element.closest('.c4ai-highlight-box') ||
element.hasAttribute('data-c4ai-badge')) {
return;
}
// Add hover class
element.classList.add('c4ai-hover-candidate');
}
handleElementMouseOut(event) {
const element = event.target;
element.classList.remove('c4ai-hover-candidate');
}
handleKeyboard(event) {
// ESC to deactivate
if (event.key === 'Escape') {
this.deactivate();
}
// Ctrl/Cmd + A to select all visible elements
else if ((event.ctrlKey || event.metaKey) && event.key === 'a') {
event.preventDefault();
// Select all visible text-containing elements
const elements = document.querySelectorAll('p, h1, h2, h3, h4, h5, h6, li, td, th, div, span, article, section');
elements.forEach(el => {
if (el.textContent.trim() && this.isVisible(el) && !this.selectedElements.has(el)) {
this.selectElement(el);
}
});
this.updateUI();
}
}
isVisible(element) {
const rect = element.getBoundingClientRect();
const style = window.getComputedStyle(element);
return rect.width > 0 &&
rect.height > 0 &&
style.display !== 'none' &&
style.visibility !== 'hidden' &&
style.opacity !== '0';
}
selectElement(element) {
this.selectedElements.add(element);
// Create highlight box
const box = this.createHighlightBox(element);
this.highlightBoxes.set(element, box);
// Add selected class
element.classList.add('c4ai-selected');
this.selectionCounter++;
}
deselectElement(element) {
this.selectedElements.delete(element);
// Remove highlight box (badge)
const badge = this.highlightBoxes.get(element);
if (badge) {
// Remove scroll/resize listeners
if (badge._updatePosition) {
window.removeEventListener('scroll', badge._updatePosition, true);
window.removeEventListener('resize', badge._updatePosition);
}
badge.remove();
this.highlightBoxes.delete(element);
}
// Remove outline
element.style.outline = '';
element.style.outlineOffset = '';
// Remove attributes
element.removeAttribute('data-c4ai-selection-order');
element.classList.remove('c4ai-selected');
this.selectionCounter--;
}
createHighlightBox(element) {
// Add a data attribute to track selection order
element.setAttribute('data-c4ai-selection-order', this.selectionCounter + 1);
// Add selection outline directly to the element
element.style.outline = '2px solid #0fbbaa';
element.style.outlineOffset = '2px';
// Create badge with fixed positioning
const badge = document.createElement('div');
badge.className = 'c4ai-selection-badge-fixed';
badge.textContent = this.selectionCounter + 1;
badge.setAttribute('data-c4ai-badge', 'true');
badge.title = 'Click to deselect';
// Get element position and set badge position
const rect = element.getBoundingClientRect();
badge.style.cssText = `
position: fixed !important;
top: ${rect.top - 12}px !important;
left: ${rect.left - 12}px !important;
width: 24px !important;
height: 24px !important;
background: #0fbbaa !important;
color: #070708 !important;
border-radius: 50% !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
font-size: 12px !important;
font-weight: bold !important;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif !important;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3) !important;
z-index: 999998 !important;
cursor: pointer !important;
transition: all 0.2s ease !important;
pointer-events: auto !important;
border: none !important;
padding: 0 !important;
margin: 0 !important;
line-height: 1 !important;
text-align: center !important;
text-decoration: none !important;
box-sizing: border-box !important;
`;
// Add hover styles dynamically
badge.addEventListener('mouseenter', () => {
badge.style.setProperty('background', '#ff3c74', 'important');
badge.style.setProperty('transform', 'scale(1.1)', 'important');
});
badge.addEventListener('mouseleave', () => {
badge.style.setProperty('background', '#0fbbaa', 'important');
badge.style.setProperty('transform', 'scale(1)', 'important');
});
// Add click handler to badge for deselection
badge.addEventListener('click', (e) => {
e.stopPropagation();
e.preventDefault();
this.deselectElement(element);
this.updateUI();
});
// Add scroll listener to update position
const updatePosition = () => {
const newRect = element.getBoundingClientRect();
badge.style.top = `${newRect.top - 12}px`;
badge.style.left = `${newRect.left - 12}px`;
};
// Store the update function so we can remove it later
badge._updatePosition = updatePosition;
window.addEventListener('scroll', updatePosition, true);
window.addEventListener('resize', updatePosition);
document.body.appendChild(badge);
return badge;
}
clearSelection() {
// Clear all selections
this.selectedElements.forEach(element => {
// Remove badge
const badge = this.highlightBoxes.get(element);
if (badge) {
// Remove scroll/resize listeners
if (badge._updatePosition) {
window.removeEventListener('scroll', badge._updatePosition, true);
window.removeEventListener('resize', badge._updatePosition);
}
badge.remove();
}
// Remove outline
element.style.outline = '';
element.style.outlineOffset = '';
// Remove attributes
element.removeAttribute('data-c4ai-selection-order');
element.classList.remove('c4ai-selected');
});
this.selectedElements.clear();
this.highlightBoxes.clear();
this.selectionCounter = 0;
this.updateUI();
}
updateUI() {
const count = this.selectedElements.size;
// Update selection count
this.toolbar.querySelector('.c4ai-selection-count').textContent =
`${count} element${count !== 1 ? 's' : ''} selected`;
// Enable/disable buttons
const hasSelection = count > 0;
this.toolbar.querySelector('.c4ai-preview-btn').disabled = !hasSelection;
this.toolbar.querySelector('.c4ai-copy-btn').disabled = !hasSelection;
this.toolbar.querySelector('.c4ai-clear-btn').disabled = !hasSelection;
}
async showPreview() {
// Generate markdown from selected elements
const markdown = await this.generateMarkdown();
// Create or update preview panel
if (!this.previewPanel) {
this.createPreviewPanel();
}
await this.updatePreviewContent(markdown);
this.previewPanel.style.display = 'block';
}
createPreviewPanel() {
this.previewPanel = document.createElement('div');
this.previewPanel.className = 'c4ai-c2c-preview';
this.previewPanel.innerHTML = `
<div class="c4ai-preview-header">
<div class="c4ai-toolbar-dots">
<span class="c4ai-dot c4ai-dot-red"></span>
<span class="c4ai-dot c4ai-dot-yellow"></span>
<span class="c4ai-dot c4ai-dot-green"></span>
</div>
<span class="c4ai-preview-title">Markdown Preview</span>
<button class="c4ai-preview-close">×</button>
</div>
<div class="c4ai-preview-options">
<label><input type="checkbox" name="textOnly"> 👁️ Visual Text Mode (As You See) TRY THIS!!!</label>
<label><input type="checkbox" name="includeImages" checked> Include Images</label>
<label><input type="checkbox" name="preserveTables" checked> Preserve Tables</label>
<label><input type="checkbox" name="preserveLinks" checked> Preserve Links</label>
<label><input type="checkbox" name="keepCodeFormatting" checked> Keep Code Formatting</label>
<label><input type="checkbox" name="simplifyLayout"> Simplify Layout</label>
<label><input type="checkbox" name="addSeparators" checked> Add Separators</label>
<label><input type="checkbox" name="includeXPath"> Include XPath Headers</label>
</div>
<div class="c4ai-preview-content">
<div class="c4ai-preview-tabs">
<button class="c4ai-tab active" data-tab="preview">Preview</button>
<button class="c4ai-tab" data-tab="markdown">Markdown</button>
<button class="c4ai-wrap-toggle" title="Toggle word wrap">↔️ Wrap</button>
</div>
<div class="c4ai-preview-pane active" data-pane="preview"></div>
<div class="c4ai-preview-pane" data-pane="markdown"></div>
</div>
<div class="c4ai-preview-actions">
<button class="c4ai-download-btn">Download .md</button>
<button class="c4ai-copy-markdown-btn">Copy Markdown</button>
<button class="c4ai-cloud-btn" disabled>Send to Cloud (Coming Soon)</button>
</div>
`;
document.body.appendChild(this.previewPanel);
makeDraggableByHeader(this.previewPanel);
// Position preview panel
this.previewPanel.style.position = 'fixed';
this.previewPanel.style.top = '50%';
this.previewPanel.style.left = '50%';
this.previewPanel.style.transform = 'translate(-50%, -50%)';
this.previewPanel.style.zIndex = '999999';
this.setupPreviewEventListeners();
}
setupPreviewEventListeners() {
// Close button
this.previewPanel.querySelector('.c4ai-preview-close').addEventListener('click', () => {
this.previewPanel.style.display = 'none';
});
// Tab switching
this.previewPanel.querySelectorAll('.c4ai-tab').forEach(tab => {
tab.addEventListener('click', (e) => {
const tabName = e.target.dataset.tab;
this.switchPreviewTab(tabName);
});
});
// Wrap toggle
const wrapToggle = this.previewPanel.querySelector('.c4ai-wrap-toggle');
wrapToggle.addEventListener('click', () => {
const panes = this.previewPanel.querySelectorAll('.c4ai-preview-pane');
panes.forEach(pane => {
pane.classList.toggle('wrap');
});
wrapToggle.classList.toggle('active');
});
// Options change
this.previewPanel.querySelectorAll('input[type="checkbox"]').forEach(checkbox => {
checkbox.addEventListener('change', async (e) => {
this.options[e.target.name] = e.target.checked;
// If text-only is enabled, automatically disable certain options
if (e.target.name === 'textOnly' && e.target.checked) {
// Update UI checkboxes
const preserveLinksCheckbox = this.previewPanel.querySelector('input[name="preserveLinks"]');
if (preserveLinksCheckbox) {
preserveLinksCheckbox.checked = false;
preserveLinksCheckbox.disabled = true;
}
// Optionally disable images in text-only mode
const includeImagesCheckbox = this.previewPanel.querySelector('input[name="includeImages"]');
if (includeImagesCheckbox) {
includeImagesCheckbox.disabled = true;
}
} else if (e.target.name === 'textOnly' && !e.target.checked) {
// Re-enable options when text-only is disabled
const preserveLinksCheckbox = this.previewPanel.querySelector('input[name="preserveLinks"]');
if (preserveLinksCheckbox) {
preserveLinksCheckbox.disabled = false;
}
const includeImagesCheckbox = this.previewPanel.querySelector('input[name="includeImages"]');
if (includeImagesCheckbox) {
includeImagesCheckbox.disabled = false;
}
}
const markdown = await this.generateMarkdown();
await this.updatePreviewContent(markdown);
});
});
// Action buttons
this.previewPanel.querySelector('.c4ai-copy-markdown-btn').addEventListener('click', () => {
this.copyToClipboard();
});
this.previewPanel.querySelector('.c4ai-download-btn').addEventListener('click', () => {
this.downloadMarkdown();
});
}
switchPreviewTab(tabName) {
// Update active tab
this.previewPanel.querySelectorAll('.c4ai-tab').forEach(tab => {
tab.classList.toggle('active', tab.dataset.tab === tabName);
});
// Update active pane
this.previewPanel.querySelectorAll('.c4ai-preview-pane').forEach(pane => {
pane.classList.toggle('active', pane.dataset.pane === tabName);
});
}
async updatePreviewContent(markdown) {
// Update markdown pane
const markdownPane = this.previewPanel.querySelector('[data-pane="markdown"]');
markdownPane.innerHTML = `<pre><code>${this.escapeHtml(markdown)}</code></pre>`;
// Update preview pane using marked.js
const previewPane = this.previewPanel.querySelector('[data-pane="preview"]');
// Configure marked options (marked.js is already loaded via manifest)
if (window.marked) {
marked.setOptions({
gfm: true,
breaks: true,
tables: true,
headerIds: false,
mangle: false
});
// Render markdown to HTML
const html = marked.parse(markdown);
previewPane.innerHTML = `<div class="c4ai-markdown-preview">${html}</div>`;
} else {
// Fallback if marked.js is not available
previewPane.innerHTML = `<div class="c4ai-markdown-preview"><pre>${this.escapeHtml(markdown)}</pre></div>`;
}
}
escapeHtml(unsafe) {
return unsafe
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}
async generateMarkdown() {
// Get selected elements as array
const elements = Array.from(this.selectedElements);
// Sort elements by their selection order
const sortedElements = elements.sort((a, b) => {
const orderA = parseInt(a.getAttribute('data-c4ai-selection-order') || '0');
const orderB = parseInt(b.getAttribute('data-c4ai-selection-order') || '0');
return orderA - orderB;
});
// Convert each element separately
const markdownParts = [];
for (let i = 0; i < sortedElements.length; i++) {
const element = sortedElements[i];
// Add XPath header if enabled
if (this.options.includeXPath) {
const xpath = this.getXPath(element);
markdownParts.push(`### Element ${i + 1} - XPath: \`${xpath}\`\n`);
}
// Check if element is part of a table structure that should be processed specially
let elementsToConvert = [element];
// If text-only mode and element is a TR, process the entire table for better context
if (this.options.textOnly && element.tagName === 'TR') {
const table = element.closest('table');
if (table && !sortedElements.includes(table)) {
// Only include this table row, not the whole table
elementsToConvert = [element];
}
}
// Analyze and convert individual element
const analysis = await this.contentAnalyzer.analyze(elementsToConvert);
const markdown = await this.markdownConverter.convert(elementsToConvert, {
...this.options,
analysis
});
markdownParts.push(markdown.trim());
// Add separator if enabled and not last element
if (this.options.addSeparators && i < sortedElements.length - 1) {
markdownParts.push('\n\n---\n\n');
}
}
return markdownParts.join('\n\n');
}
getXPath(element) {
if (element.id) {
return `//*[@id="${element.id}"]`;
}
const parts = [];
let current = element;
while (current && current.nodeType === Node.ELEMENT_NODE) {
let index = 0;
let sibling = current.previousSibling;
while (sibling) {
if (sibling.nodeType === Node.ELEMENT_NODE && sibling.nodeName === current.nodeName) {
index++;
}
sibling = sibling.previousSibling;
}
const tagName = current.nodeName.toLowerCase();
const part = index > 0 ? `${tagName}[${index + 1}]` : tagName;
parts.unshift(part);
current = current.parentNode;
}
return '/' + parts.join('/');
}
sortElementsByPosition(elements) {
return elements.sort((a, b) => {
const position = a.compareDocumentPosition(b);
if (position & Node.DOCUMENT_POSITION_FOLLOWING) {
return -1;
} else if (position & Node.DOCUMENT_POSITION_PRECEDING) {
return 1;
}
return 0;
});
}
async copyToClipboard() {
const markdown = await this.generateMarkdown();
try {
await navigator.clipboard.writeText(markdown);
this.showNotification('Markdown copied to clipboard!');
} catch (err) {
console.error('Failed to copy:', err);
this.showNotification('Failed to copy. Please try again.', 'error');
}
}
async downloadMarkdown() {
const markdown = await this.generateMarkdown();
const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5);
const filename = `crawl4ai-export-${timestamp}.md`;
// Create blob and download
const blob = new Blob([markdown], { type: 'text/markdown' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
this.showNotification(`Downloaded ${filename}`);
}
showNotification(message, type = 'success') {
const notification = document.createElement('div');
notification.className = `c4ai-notification c4ai-notification-${type}`;
notification.textContent = message;
document.body.appendChild(notification);
// Animate in
setTimeout(() => notification.classList.add('show'), 10);
// Remove after 3 seconds
setTimeout(() => {
notification.classList.remove('show');
setTimeout(() => notification.remove(), 300);
}, 3000);
}
deactivate() {
// Remove event listeners
document.removeEventListener('click', this.documentClickHandler, true);
document.removeEventListener('click', this.linkClickHandler, true);
document.removeEventListener('mouseover', this.documentHoverHandler, true);
document.removeEventListener('mouseout', this.documentMouseOutHandler, true);
document.removeEventListener('keydown', this.keyboardHandler);
// Clear selections
this.clearSelection();
// Remove UI elements
if (this.toolbar) {
this.toolbar.remove();
this.toolbar = null;
}
if (this.previewPanel) {
this.previewPanel.remove();
this.previewPanel = null;
}
// Remove hover styles
document.querySelectorAll('.c4ai-hover-candidate').forEach(el => {
el.classList.remove('c4ai-hover-candidate');
});
// Notify background script (with error handling)
try {
if (chrome.runtime && chrome.runtime.sendMessage) {
chrome.runtime.sendMessage({
action: 'c2cDeactivated'
});
}
} catch (error) {
// Extension context might be invalidated, ignore the error
console.log('Click2Crawl deactivated (extension context unavailable)');
}
}
}