Files
crawl4ai/docs/md/demo.md
unclecode d6182bedd7 chore:
- Add demo page to the new mkdocs
- Set website home page to mkdocs
2024-06-22 20:36:01 +08:00

8.7 KiB

Interactive Demo for Crowler

Enter URL and Options
Submit
<div id="loading" class="loading-message">
    <div class="terminal-alert terminal-alert-primary">Loading... Please wait.</div>
</div>

<section id="response" class="response-section">
    <h2>Response</h2>
    <div class="tabs">
        <ul class="tab-list">
            <li class="tab-item" onclick="showTab('markdown')">Markdown</li>
            <li class="tab-item" onclick="showTab('cleanedHtml')">Cleaned HTML</li>
            <li class="tab-item" onclick="showTab('media')">Media</li>
            <li class="tab-item" onclick="showTab('extractedContent')">Extracted Content</li>
            <li class="tab-item" onclick="showTab('screenshot')">Screenshot</li>
            <li class="tab-item" onclick="showTab('pythonCode')">Python Code</li>
        </ul>
        <div class="tab-content" id="tab-markdown">
            <header>
                <div>
                    <button class="btn btn-default btn-ghost btn-sm" onclick="copyToClipboard('markdownContent')">Copy</button>
                    <button class="btn btn-default btn-ghost btn-sm" onclick="downloadContent('markdownContent', 'markdown.md')">Download</button>
                </div>
            </header>
            <pre><code id="markdownContent" class="language-markdown hljs"></code></pre>
        </div>

        <div class="tab-content" id="tab-cleanedHtml" style="display: none;">
            <header >
                <div>
                    <button class="btn btn-default btn-ghost btn-sm" onclick="copyToClipboard('cleanedHtmlContent')">Copy</button>
                    <button class="btn btn-default btn-ghost btn-sm" onclick="downloadContent('cleanedHtmlContent', 'cleaned.html')">Download</button>
                </div>
            </header>
            <pre><code id="cleanedHtmlContent" class="language-html hljs"></code></pre>
        </div>

        <div class="tab-content" id="tab-media" style="display: none;">
            <header >
                <div>
                    <button class="btn btn-default btn-ghost btn-sm" onclick="copyToClipboard('mediaContent')">Copy</button>
                    <button class="btn btn-default btn-ghost btn-sm" onclick="downloadContent('mediaContent', 'media.json')">Download</button>
                </div>
            </header>
            <pre><code id="mediaContent" class="language-json hljs"></code></pre>
        </div>

        <div class="tab-content" id="tab-extractedContent" style="display: none;">
            <header >
                <div>
                    <button class="btn btn-default btn-ghost btn-sm" onclick="copyToClipboard('extractedContentContent')">Copy</button>
                    <button class="btn btn-default btn-ghost btn-sm" onclick="downloadContent('extractedContentContent', 'extracted_content.json')">Download</button>
                </div>
            </header>
            <pre><code id="extractedContentContent" class="language-json hljs"></code></pre>
        </div>

        <div class="tab-content" id="tab-screenshot" style="display: none;">
            <header >
                <div>
                    <button class="btn btn-default btn-ghost btn-sm" onclick="downloadImage('screenshotContent', 'screenshot.png')">Download</button>
                </div>
            </header>
            <pre><img id="screenshotContent" /></pre>
        </div>

        <div class="tab-content" id="tab-pythonCode" style="display: none;">
            <header >
                <div>
                    <button class="btn btn-default btn-ghost btn-sm" onclick="copyToClipboard('pythonCode')">Copy</button>
                    <button class="btn btn-default btn-ghost btn-sm" onclick="downloadContent('pythonCode', 'example.py')">Download</button>
                </div>
            </header>
            <pre><code id="pythonCode" class="language-python hljs"></code></pre>
        </div>
    </div>
</section>

<script>
    function showTab(tabId) {
        const tabs = document.querySelectorAll('.tab-content');
        tabs.forEach(tab => tab.style.display = 'none');
        document.getElementById(`tab-${tabId}`).style.display = 'block';
    }

    function redo(codeBlock, codeText){
        codeBlock.classList.remove('hljs');
        codeBlock.removeAttribute('data-highlighted');

        // Set new code and re-highlight
        codeBlock.textContent = codeText;
        hljs.highlightBlock(codeBlock);
    }

    function copyToClipboard(elementId) {
        const content = document.getElementById(elementId).textContent;
        navigator.clipboard.writeText(content).then(() => {
            alert('Copied to clipboard');
        });
    }

    function downloadContent(elementId, filename) {
        const content = document.getElementById(elementId).textContent;
        const blob = new Blob([content], { type: 'text/plain' });
        const url = window.URL.createObjectURL(blob);
        const a = document.createElement('a');
        a.style.display = 'none';
        a.href = url;
        a.download = filename;
        document.body.appendChild(a);
        a.click();
        window.URL.revokeObjectURL(url);
        document.body.removeChild(a);
    }

    function downloadImage(elementId, filename) {
        const content = document.getElementById(elementId).src;
        const a = document.createElement('a');
        a.style.display = 'none';
        a.href = content;
        a.download = filename;
        document.body.appendChild(a);
        a.click();
        document.body.removeChild(a);
    }

    document.getElementById('crawlForm').addEventListener('submit', function(event) {
        event.preventDefault();
        document.getElementById('loading').style.display = 'block';
        document.getElementById('response').style.display = 'none';

        const url = document.getElementById('url').value;
        const screenshot = document.getElementById('screenshot').checked;
        const data = {
            urls: [url],
            bypass_cache: false,
            word_count_threshold: 5,
            screenshot: screenshot
        };

        fetch('/crawl', {
            method: 'POST',
            headers: {
                'Content-Type': 'application/json'
            },
            body: JSON.stringify(data)
        })
        .then(response => response.json())
        .then(data => {
            data = data.results[0]; // Only one URL is requested
            document.getElementById('loading').style.display = 'none';
            document.getElementById('response').style.display = 'block';
            redo(document.getElementById('markdownContent'), data.markdown);
            redo(document.getElementById('cleanedHtmlContent'), data.cleaned_html);
            redo(document.getElementById('mediaContent'), JSON.stringify(data.media, null, 2));
            redo(document.getElementById('extractedContentContent'), data.extracted_content);
            if (screenshot) {
                document.getElementById('screenshotContent').src = `data:image/png;base64,${data.screenshot}`;
            }
            const pythonCode = `

from crawl4ai.web_crawler import WebCrawler

crawler = WebCrawler() crawler.warmup()

result = crawler.run( url='${url}', screenshot=${screenshot} ) print(result) `; redo(document.getElementById('pythonCode'), pythonCode); }) .catch(error => { document.getElementById('loading').style.display = 'none'; document.getElementById('response').style.display = 'block'; document.getElementById('markdownContent').textContent = 'Error: ' + error; }); }); </script>