Files
crawl4ai/docs/md_v2/apps/llmtxt/llmtxt.js
UncleCode 40640badad feat: add Script Builder to Chrome Extension and reorganize LLM context files
This commit introduces significant enhancements to the Crawl4AI ecosystem:

  Chrome Extension - Script Builder (Alpha):
  - Add recording functionality to capture user interactions (clicks, typing, scrolling)
  - Implement smart event grouping for cleaner script generation
  - Support export to both JavaScript and C4A script formats
  - Add timeline view for visualizing and editing recorded actions
  - Include wait commands (time-based and element-based)
  - Add saved flows functionality for reusing automation scripts
  - Update UI with consistent dark terminal theme (Dank Mono font, green/pink accents)
  - Release new extension versions: v1.1.0, v1.2.0, v1.2.1

  LLM Context Builder Improvements:
  - Reorganize context files from llmtxt/ to llm.txt/ with better structure
  - Separate diagram templates from text content (diagrams/ and txt/ subdirectories)
  - Add comprehensive context files for all major Crawl4AI components
  - Improve file naming convention for better discoverability

  Documentation Updates:
  - Update apps index page to match main documentation theme
  - Standardize color scheme: "Available" tags use primary color (#50ffff)
  - Change "Coming Soon" tags to dark gray for better visual hierarchy
  - Add interactive two-column layout for extension landing page
  - Include code examples for both Schema Builder and Script Builder features

  Technical Improvements:
  - Enhance event capture mechanism with better element selection
  - Add support for contenteditable elements and complex form interactions
  - Implement proper scroll event handling for both window and element scrolling
  - Add meta key support for keyboard shortcuts
  - Improve selector generation for more reliable element targeting

  The Script Builder is released as Alpha, acknowledging potential bugs while providing
  early access to this powerful automation recording feature.
2025-06-08 22:02:12 +08:00

576 lines
18 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Crawl4AI LLM Context Builder JavaScript
// Component definitions - order matters
const components = [
{
id: 'installation',
name: 'Installation',
description: 'Setup and installation options'
},
{
id: 'simple_crawling',
name: 'Simple Crawling',
description: 'Basic web crawling operations'
},
{
id: 'config_objects',
name: 'Configuration Objects',
description: 'Browser and crawler configuration'
},
{
id: 'extraction',
name: 'Data Extraction',
description: 'Structured data extraction strategies'
},
{
id: 'multi_urls_crawling',
name: 'Multi URLs Crawling',
description: 'Crawling multiple URLs efficiently'
},
{
id: 'deep_crawling',
name: 'Deep Crawling',
description: 'Multi-page crawling strategies'
},
{
id: 'docker',
name: 'Docker',
description: 'Docker deployment and configuration'
},
{
id: 'cli',
name: 'CLI',
description: 'Command-line interface usage'
},
{
id: 'http_based_crawler_strategy',
name: 'HTTP-based Crawler',
description: 'HTTP crawler strategy implementation'
},
{
id: 'url_seeder',
name: 'URL Seeder',
description: 'URL seeding and discovery'
},
{
id: 'deep_crawl_advanced_filters_scorers',
name: 'Advanced Filters & Scorers',
description: 'Deep crawl filtering and scoring'
}
];
// Context types
const contextTypes = ['memory', 'reasoning', 'examples'];
// State management
const state = {
selectedComponents: new Set(),
selectedContextTypes: new Map(),
tokenCounts: new Map() // Store token counts for each file
};
// Initialize the application
document.addEventListener('DOMContentLoaded', () => {
renderComponents();
renderReferenceTable();
setupActionHandlers();
setupColumnHeaderHandlers();
// Initialize first component as selected with available context types
const firstComponent = components[0];
state.selectedComponents.add(firstComponent.id);
state.selectedContextTypes.set(firstComponent.id, new Set(['memory', 'reasoning']));
updateComponentUI();
});
// Helper function to count tokens (words × 2.5)
function estimateTokens(text) {
if (!text) return 0;
const words = text.trim().split(/\s+/).length;
return Math.round(words * 2.5);
}
// Update total token count display
function updateTotalTokenCount() {
let totalTokens = 0;
state.selectedComponents.forEach(compId => {
const types = state.selectedContextTypes.get(compId);
if (types) {
types.forEach(type => {
const key = `${compId}-${type}`;
totalTokens += state.tokenCounts.get(key) || 0;
});
}
});
document.getElementById('total-tokens').textContent = totalTokens.toLocaleString();
}
// Render component selection table
function renderComponents() {
const tbody = document.getElementById('components-tbody');
tbody.innerHTML = '';
components.forEach(component => {
const row = createComponentRow(component);
tbody.appendChild(row);
});
// Fetch token counts for all files
fetchAllTokenCounts();
}
// Create a component table row
function createComponentRow(component) {
const tr = document.createElement('tr');
tr.id = `component-${component.id}`;
// Component checkbox cell
const checkboxCell = document.createElement('td');
checkboxCell.innerHTML = `
<input type="checkbox" id="check-${component.id}"
data-component="${component.id}">
`;
tr.appendChild(checkboxCell);
// Component name cell
const nameCell = document.createElement('td');
nameCell.innerHTML = `<span class="component-name">${component.name}</span>`;
tr.appendChild(nameCell);
// Context type cells
contextTypes.forEach(type => {
const td = document.createElement('td');
const key = `${component.id}-${type}`;
const tokenCount = state.tokenCounts.get(key) || 0;
const isDisabled = type === 'examples' ? 'disabled' : '';
td.innerHTML = `
<input type="checkbox" id="check-${component.id}-${type}"
data-component="${component.id}" data-type="${type}"
${isDisabled}>
<span class="token-info" id="tokens-${component.id}-${type}">
${tokenCount > 0 ? `${tokenCount.toLocaleString()} tokens` : ''}
</span>
`;
tr.appendChild(td);
});
// Add event listeners
const mainCheckbox = tr.querySelector(`#check-${component.id}`);
mainCheckbox.addEventListener('change', (e) => {
handleComponentToggle(component.id, e.target.checked);
});
// Add event listeners for context type checkboxes
contextTypes.forEach(type => {
const typeCheckbox = tr.querySelector(`#check-${component.id}-${type}`);
if (!typeCheckbox.disabled) {
typeCheckbox.addEventListener('change', (e) => {
handleContextTypeToggle(component.id, type, e.target.checked);
});
}
});
return tr;
}
// Handle component checkbox toggle
function handleComponentToggle(componentId, checked) {
if (checked) {
state.selectedComponents.add(componentId);
// Select only available context types when component is selected
if (!state.selectedContextTypes.has(componentId)) {
state.selectedContextTypes.set(componentId, new Set(['memory', 'reasoning']));
} else {
// If component was already partially selected, select all available
state.selectedContextTypes.set(componentId, new Set(['memory', 'reasoning']));
}
} else {
state.selectedComponents.delete(componentId);
state.selectedContextTypes.delete(componentId);
}
updateComponentUI();
}
// Handle component selection based on context types
function updateComponentSelection(componentId) {
const types = state.selectedContextTypes.get(componentId) || new Set();
if (types.size > 0) {
state.selectedComponents.add(componentId);
} else {
state.selectedComponents.delete(componentId);
}
}
// Handle context type checkbox toggle
function handleContextTypeToggle(componentId, type, checked) {
if (!state.selectedContextTypes.has(componentId)) {
state.selectedContextTypes.set(componentId, new Set());
}
const types = state.selectedContextTypes.get(componentId);
if (checked) {
types.add(type);
} else {
types.delete(type);
}
updateComponentSelection(componentId);
updateComponentUI();
}
// Update UI to reflect current state
function updateComponentUI() {
components.forEach(component => {
const row = document.getElementById(`component-${component.id}`);
if (!row) return;
const mainCheckbox = row.querySelector(`#check-${component.id}`);
const hasSelection = state.selectedComponents.has(component.id);
const selectedTypes = state.selectedContextTypes.get(component.id) || new Set();
// Update main checkbox
mainCheckbox.checked = hasSelection;
// Update row disabled state
row.classList.toggle('disabled', !hasSelection);
// Update context type checkboxes
contextTypes.forEach(type => {
const typeCheckbox = row.querySelector(`#check-${component.id}-${type}`);
typeCheckbox.checked = selectedTypes.has(type);
});
});
updateTotalTokenCount();
}
// Fetch token counts for all files
async function fetchAllTokenCounts() {
const promises = [];
components.forEach(component => {
contextTypes.forEach(type => {
promises.push(fetchTokenCount(component.id, type));
});
});
await Promise.all(promises);
updateComponentUI();
renderReferenceTable(); // Update reference table with token counts
}
// Fetch token count for a specific file
async function fetchTokenCount(componentId, type) {
const key = `${componentId}-${type}`;
try {
const fileName = getFileName(componentId, type);
const baseUrl = getBaseUrl(type);
const response = await fetch(baseUrl + fileName);
if (response.ok) {
const content = await response.text();
const tokens = estimateTokens(content);
state.tokenCounts.set(key, tokens);
// Update UI
const tokenSpan = document.getElementById(`tokens-${componentId}-${type}`);
if (tokenSpan) {
tokenSpan.textContent = `${tokens.toLocaleString()} tokens`;
}
} else if (type === 'examples') {
// Examples might not exist yet
state.tokenCounts.set(key, 0);
const tokenSpan = document.getElementById(`tokens-${componentId}-${type}`);
if (tokenSpan) {
tokenSpan.textContent = '';
}
}
} catch (error) {
console.warn(`Failed to fetch token count for ${componentId}-${type}`);
if (type === 'examples') {
const tokenSpan = document.getElementById(`tokens-${componentId}-${type}`);
if (tokenSpan) {
tokenSpan.textContent = '';
}
}
}
}
// Get file name based on component and type
function getFileName(componentId, type) {
// For new structure, all files are just [componentId].txt
return `${componentId}.txt`;
}
// Get base URL based on context type
function getBaseUrl(type) {
// For MkDocs, we need to go up to the root level
const basePrefix = window.location.pathname.includes('/apps/') ? '../../' : '/';
switch(type) {
case 'memory':
return basePrefix + 'assets/llm.txt/txt/';
case 'reasoning':
return basePrefix + 'assets/llm.txt/diagrams/';
case 'examples':
return basePrefix + 'assets/llm.txt/examples/'; // Will return 404 for now
default:
return basePrefix + 'assets/llm.txt/txt/';
}
}
// Setup action button handlers
function setupActionHandlers() {
// Select/Deselect all buttons
document.getElementById('select-all').addEventListener('click', () => {
components.forEach(comp => {
state.selectedComponents.add(comp.id);
state.selectedContextTypes.set(comp.id, new Set(['memory', 'reasoning']));
});
updateComponentUI();
});
document.getElementById('deselect-all').addEventListener('click', () => {
state.selectedComponents.clear();
state.selectedContextTypes.clear();
updateComponentUI();
});
// Download button
document.getElementById('download-btn').addEventListener('click', handleDownload);
}
// Setup column header click handlers
function setupColumnHeaderHandlers() {
const headers = document.querySelectorAll('.clickable-header');
headers.forEach(header => {
header.addEventListener('click', () => {
const type = header.getAttribute('data-type');
toggleColumnSelection(type);
});
});
}
// Toggle all checkboxes in a column
function toggleColumnSelection(type) {
// Don't toggle examples column
if (type === 'examples') return;
// Check if all are currently selected
let allSelected = true;
components.forEach(comp => {
const types = state.selectedContextTypes.get(comp.id);
if (!types || !types.has(type)) {
allSelected = false;
}
});
// Toggle all
components.forEach(comp => {
if (!state.selectedContextTypes.has(comp.id)) {
state.selectedContextTypes.set(comp.id, new Set());
}
const types = state.selectedContextTypes.get(comp.id);
if (allSelected) {
types.delete(type);
} else {
types.add(type);
}
updateComponentSelection(comp.id);
});
updateComponentUI();
}
// Handle download action
async function handleDownload() {
const statusEl = document.getElementById('status');
statusEl.textContent = 'Preparing context files...';
statusEl.className = 'status loading';
try {
const files = getSelectedFiles();
if (files.length === 0) {
throw new Error('No files selected. Please select at least one component or preset.');
}
statusEl.textContent = `Fetching ${files.length} files...`;
const contents = await fetchFiles(files);
const combined = combineContents(contents);
downloadFile(combined, 'crawl4ai_custom_context.md');
statusEl.textContent = 'Download complete!';
statusEl.className = 'status success';
setTimeout(() => {
statusEl.textContent = '';
statusEl.className = 'status';
}, 3000);
} catch (error) {
statusEl.textContent = `Error: ${error.message}`;
statusEl.className = 'status error';
}
}
// Get list of selected files based on current state
function getSelectedFiles() {
const files = [];
// Build list of selected files with their context info
state.selectedComponents.forEach(compId => {
const types = state.selectedContextTypes.get(compId);
if (types) {
types.forEach(type => {
files.push({
componentId: compId,
type: type,
fileName: getFileName(compId, type),
baseUrl: getBaseUrl(type)
});
});
}
});
return files;
}
// Fetch multiple files
async function fetchFiles(fileInfos) {
const promises = fileInfos.map(async (fileInfo) => {
try {
const response = await fetch(fileInfo.baseUrl + fileInfo.fileName);
if (!response.ok) {
if (fileInfo.type === 'examples') {
return {
fileInfo,
content: `<!-- Examples for ${fileInfo.componentId} coming soon -->\n\nExamples are currently being developed for this component.`
};
}
console.warn(`Failed to fetch ${fileInfo.fileName} from ${fileInfo.baseUrl + fileInfo.fileName}`);
return { fileInfo, content: `<!-- Failed to load ${fileInfo.fileName} -->` };
}
const content = await response.text();
return { fileInfo, content };
} catch (error) {
if (fileInfo.type === 'examples') {
return {
fileInfo,
content: `<!-- Examples for ${fileInfo.componentId} coming soon -->\n\nExamples are currently being developed for this component.`
};
}
console.warn(`Error fetching ${fileInfo.fileName}:`, error);
return { fileInfo, content: `<!-- Error loading ${fileInfo.fileName} -->` };
}
});
return Promise.all(promises);
}
// Combine file contents with headers
function combineContents(fileContents) {
// Calculate total tokens
let totalTokens = 0;
fileContents.forEach(({ content }) => {
totalTokens += estimateTokens(content);
});
const header = `# Crawl4AI Custom LLM Context
Generated on: ${new Date().toISOString()}
Total files: ${fileContents.length}
Estimated tokens: ${totalTokens.toLocaleString()}
---
`;
const sections = fileContents.map(({ fileInfo, content }) => {
const component = components.find(c => c.id === fileInfo.componentId);
const componentName = component ? component.name : fileInfo.componentId;
const contextType = getContextTypeName(fileInfo.type);
const tokens = estimateTokens(content);
return `## ${componentName} - ${contextType}
Component ID: ${fileInfo.componentId}
Context Type: ${fileInfo.type}
Estimated tokens: ${tokens.toLocaleString()}
${content}
---
`;
});
return header + sections.join('\n');
}
// Get display name for context type
function getContextTypeName(type) {
switch(type) {
case 'memory': return 'Full Content';
case 'reasoning': return 'Diagrams & Workflows';
case 'examples': return 'Code Examples';
default: return type;
}
}
// Download file to user's computer
function downloadFile(content, fileName) {
const blob = new Blob([content], { type: 'text/markdown' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = fileName;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}
// Render reference table
function renderReferenceTable() {
const tbody = document.getElementById('reference-table-body');
tbody.innerHTML = '';
// Get base path for links
const basePrefix = window.location.pathname.includes('/apps/') ? '../../' : '/';
components.forEach(component => {
const row = document.createElement('tr');
const memoryTokens = state.tokenCounts.get(`${component.id}-memory`) || 0;
const reasoningTokens = state.tokenCounts.get(`${component.id}-reasoning`) || 0;
const examplesTokens = state.tokenCounts.get(`${component.id}-examples`) || 0;
row.innerHTML = `
<td><strong>${component.name}</strong></td>
<td>
<a href="${basePrefix}assets/llm.txt/txt/${component.id}.txt" class="file-link" target="_blank">Memory</a>
${memoryTokens > 0 ? `<span class="file-size">${memoryTokens.toLocaleString()} tokens</span>` : ''}
</td>
<td>
<a href="${basePrefix}assets/llm.txt/diagrams/${component.id}.txt" class="file-link" target="_blank">Reasoning</a>
${reasoningTokens > 0 ? `<span class="file-size">${reasoningTokens.toLocaleString()} tokens</span>` : ''}
</td>
<td>
${examplesTokens > 0
? `<a href="${basePrefix}assets/llm.txt/examples/${component.id}.txt" class="file-link" target="_blank">Examples</a>
<span class="file-size">${examplesTokens.toLocaleString()} tokens</span>`
: '-'
}
</td>
<td>-</td>
`;
tbody.appendChild(row);
});
}