diff --git a/docs/md_v2/assets/layout.css b/docs/md_v2/assets/layout.css index 0da340fa..044c272b 100644 --- a/docs/md_v2/assets/layout.css +++ b/docs/md_v2/assets/layout.css @@ -412,17 +412,41 @@ footer { background-color: var(--primary-dimmed-color, #09b5a5); color: var(--background-color, #070708); border: none; - padding: 4px 8px; + padding: 6px 10px; font-size: 0.8em; border-radius: 4px; cursor: pointer; - box-shadow: 0 2px 5px rgba(0, 0, 0, 0.3); - transition: background-color 0.2s ease; + box-shadow: 0 3px 8px rgba(0, 0, 0, 0.3); + transition: background-color 0.2s ease, transform 0.15s ease; white-space: nowrap; + display: flex; + align-items: center; + font-weight: 500; + animation: askAiButtonAppear 0.2s ease-out; +} + +@keyframes askAiButtonAppear { + from { + opacity: 0; + transform: scale(0.9); + } + to { + opacity: 1; + transform: scale(1); + } } .ask-ai-selection-button:hover { background-color: var(--primary-color, #50ffff); + transform: scale(1.05); +} + +/* Mobile styles for Ask AI button */ +@media screen and (max-width: 768px) { + .ask-ai-selection-button { + padding: 8px 12px; /* Larger touch target on mobile */ + font-size: 0.9em; /* Slightly larger text */ + } } /* ==== File: docs/assets/layout.css (Additions) ==== */ diff --git a/docs/md_v2/assets/selection_ask_ai.js b/docs/md_v2/assets/selection_ask_ai.js index b5cb471d..e88ad34e 100644 --- a/docs/md_v2/assets/selection_ask_ai.js +++ b/docs/md_v2/assets/selection_ask_ai.js @@ -8,12 +8,32 @@ document.addEventListener('DOMContentLoaded', () => { const button = document.createElement('button'); button.id = 'ask-ai-selection-btn'; button.className = 'ask-ai-selection-button'; - button.textContent = 'Ask AI'; // Or use an icon + + // Add icon and text for better visibility + button.innerHTML = ` + + + + Ask AI + `; + + // Common styles button.style.display = 'none'; // Initially hidden button.style.position = 'absolute'; button.style.zIndex = '1500'; // Ensure it's on top + button.style.boxShadow = '0 3px 8px rgba(0, 0, 0, 0.4)'; // More pronounced shadow + button.style.transition = 'transform 0.15s ease, background-color 0.2s ease'; // Smooth hover effect + + // Add transform on hover + button.addEventListener('mouseover', () => { + button.style.transform = 'scale(1.05)'; + }); + + button.addEventListener('mouseout', () => { + button.style.transform = 'scale(1)'; + }); + document.body.appendChild(button); - button.addEventListener('click', handleAskAiClick); return button; } @@ -43,11 +63,38 @@ document.addEventListener('DOMContentLoaded', () => { const range = selection.getRangeAt(0); const rect = range.getBoundingClientRect(); - // Calculate position: top-right of the selection + // Get viewport dimensions + const viewportWidth = window.innerWidth; + const viewportHeight = window.innerHeight; + + // Calculate position based on selection const scrollX = window.scrollX; const scrollY = window.scrollY; - const buttonTop = rect.top + scrollY - askAiButton.offsetHeight - 5; // 5px above - const buttonLeft = rect.right + scrollX + 5; // 5px to the right + + // Default position (top-right of selection) + let buttonTop = rect.top + scrollY - askAiButton.offsetHeight - 5; // 5px above + let buttonLeft = rect.right + scrollX + 5; // 5px to the right + + // Check if we're on mobile (which we define as less than 768px) + const isMobile = viewportWidth <= 768; + + if (isMobile) { + // On mobile, position centered above selection to avoid edge issues + buttonTop = rect.top + scrollY - askAiButton.offsetHeight - 10; // 10px above on mobile + buttonLeft = rect.left + scrollX + (rect.width / 2) - (askAiButton.offsetWidth / 2); // Centered + } else { + // For desktop, ensure the button doesn't go off screen + // Check right edge + if (buttonLeft + askAiButton.offsetWidth > scrollX + viewportWidth) { + buttonLeft = scrollX + viewportWidth - askAiButton.offsetWidth - 10; // 10px from right edge + } + } + + // Check top edge (for all devices) + if (buttonTop < scrollY) { + // If would go above viewport, position below selection instead + buttonTop = rect.bottom + scrollY + 5; // 5px below + } askAiButton.style.top = `${buttonTop}px`; askAiButton.style.left = `${buttonLeft}px`; @@ -77,8 +124,8 @@ document.addEventListener('DOMContentLoaded', () => { // --- Event Listeners --- - // Show button on mouse up after selection - document.addEventListener('mouseup', (event) => { + // Function to handle selection events (both mouse and touch) + function handleSelectionEvent(event) { // Slight delay to ensure selection is registered setTimeout(() => { const selectedText = getSafeSelectedText(); @@ -86,7 +133,7 @@ document.addEventListener('DOMContentLoaded', () => { if (!askAiButton) { askAiButton = createAskAiButton(); } - // Don't position if the click was ON the button itself + // Don't position if the event was ON the button itself if (event.target !== askAiButton) { positionButton(event); } @@ -94,16 +141,46 @@ document.addEventListener('DOMContentLoaded', () => { hideButton(); } }, 10); // Small delay + } + + // Mouse selection events (desktop) + document.addEventListener('mouseup', handleSelectionEvent); + + // Touch selection events (mobile) + document.addEventListener('touchend', handleSelectionEvent); + document.addEventListener('selectionchange', () => { + // This helps with mobile selection which can happen without mouseup/touchend + setTimeout(() => { + const selectedText = getSafeSelectedText(); + if (selectedText && askAiButton) { + positionButton(); + } + }, 300); // Longer delay for selection change }); - // Hide button on scroll or click elsewhere + // Hide button on various events document.addEventListener('mousedown', (event) => { // Hide if clicking anywhere EXCEPT the button itself if (askAiButton && event.target !== askAiButton) { hideButton(); } }); + + document.addEventListener('touchstart', (event) => { + // Same for touch events, but only hide if not on the button + if (askAiButton && event.target !== askAiButton) { + hideButton(); + } + }); + document.addEventListener('scroll', hideButton, true); // Capture scroll events + + // Also hide when pressing Escape key + document.addEventListener('keydown', (event) => { + if (event.key === 'Escape') { + hideButton(); + } + }); console.log("Selection Ask AI script loaded."); }); \ No newline at end of file diff --git a/docs/md_v2/blog/index.md b/docs/md_v2/blog/index.md index 1eed43d9..55532fce 100644 --- a/docs/md_v2/blog/index.md +++ b/docs/md_v2/blog/index.md @@ -4,6 +4,32 @@ Welcome to the Crawl4AI blog! Here you'll find detailed release notes, technical ## Latest Release +Here’s the blog index entry for **v0.6.0**, written to match the exact tone and structure of your previous entries: + +--- + +### [Crawl4AI v0.6.0 – World-Aware Crawling, Pre-Warmed Browsers, and the MCP API](releases/0.6.0.md) +*April 23, 2025* + +Crawl4AI v0.6.0 is our most powerful release yet. This update brings major architectural upgrades including world-aware crawling (set geolocation, locale, and timezone), real-time traffic capture, and a memory-efficient crawler pool with pre-warmed pages. + +The Docker server now exposes a full-featured MCP socket + SSE interface, supports streaming, and comes with a new Playground UI. Plus, table extraction is now native, and the new stress-test framework supports crawling 1,000+ URLs. + +Other key changes: + +* Native support for `result.media["tables"]` to export DataFrames +* Full network + console logs and MHTML snapshot per crawl +* Browser pooling and pre-warming for faster cold starts +* New streaming endpoints via MCP API and Playground +* Robots.txt support, proxy rotation, and improved session handling +* Deprecated old markdown names, legacy modules cleaned up +* Massive repo cleanup: ~36K insertions, ~5K deletions across 121 files + +[Read full release notes →](releases/0.6.0.md) + +--- + +Let me know if you want me to auto-update the actual file or just paste this into the markdown. ### [Crawl4AI v0.5.0: Deep Crawling, Scalability, and a New CLI!](releases/0.5.0.md) diff --git a/docs/md_v2/blog/releases/0.6.0.md b/docs/md_v2/blog/releases/0.6.0.md index 2e5bb63c..a3a7c216 100644 --- a/docs/md_v2/blog/releases/0.6.0.md +++ b/docs/md_v2/blog/releases/0.6.0.md @@ -1,51 +1,143 @@ -# Crawl4AI 0.6.0 +# Crawl4AI v0.6.0 Release Notes -*Release date: 2025‑04‑22* - -0.6.0 is the **biggest jump** since the 0.5 series, packing a smarter browser core, pool‑based crawlers, and a ton of DX candy. Expect faster runs, lower RAM burn, and richer diagnostics. +We're excited to announce the release of **Crawl4AI v0.6.0**, our biggest and most feature-rich update yet. This version introduces major architectural upgrades, brand-new capabilities for geo-aware crawling, high-efficiency scraping, and real-time streaming support for scalable deployments. --- -## 🚀 Key upgrades +## Highlights -| Area | What changed | -|------|--------------| -| **Browser** | New **Browser** management with pooling, page pre‑warm, geolocation + locale + timezone switches | -| **Crawler** | Console and network log capture, MHTML snapshots, safer `get_page` API | -| **Server & API** | **Crawler Pool Manager** endpoint, MCP socket + SSE support | -| **Docs** | v2 layout, floating Ask‑AI helper, GitHub stats badge, copy‑code buttons, Docker API demo | -| **Tests** | Memory + load benchmarks, 90+ new cases covering MCP and Docker | +### 1. **World-Aware Crawlers** +Crawl as if you’re anywhere in the world. With v0.6.0, each crawl can simulate: +- Specific GPS coordinates +- Browser locale +- Timezone + +Example: +```python +CrawlerRunConfig( + url="https://browserleaks.com/geo", + locale="en-US", + timezone_id="America/Los_Angeles", + geolocation=GeolocationConfig( + latitude=34.0522, + longitude=-118.2437, + accuracy=10.0 + ) +) +``` +Great for accessing region-specific content or testing global behavior. --- -## ⚠️ Breaking changes +### 2. **Native Table Extraction** +Extract HTML tables directly into usable formats like Pandas DataFrames or CSV with zero parsing hassle. All table data is available under `result.media["tables"]`. -1. **`get_page` signature** – returns `(html, metadata)` instead of plain html. -2. **Docker** – new Chromium base layer, rebuild images. +Example: +```python +raw_df = pd.DataFrame( + result.media["tables"][0]["rows"], + columns=result.media["tables"][0]["headers"] +) +``` +This makes it ideal for scraping financial data, pricing pages, or anything tabular. --- -## How to upgrade +### 3. **Browser Pooling & Pre-Warming** +We've overhauled browser management. Now, multiple browser instances can be pooled and pages pre-warmed for ultra-fast launches: +- Reduces cold-start latency +- Lowers memory spikes +- Enhances parallel crawling stability +This powers the new **Docker Playground** experience and streamlines heavy-load crawling. + +--- + +### 4. **Traffic & Snapshot Capture** +Need full visibility? You can now capture: +- Full network traffic logs +- Console output +- MHTML page snapshots for post-crawl audits and debugging + +No more guesswork on what happened during your crawl. + +--- + +### 5. **MCP API and Streaming Support** +We’re exposing **MCP socket and SSE endpoints**, allowing: +- Live streaming of crawl results +- Real-time integration with agents or frontends +- A new Playground UI for interactive crawling + +This is a major step towards making Crawl4AI real-time ready. + +--- + +### 6. **Stress-Test Framework** +Want to test performance under heavy load? v0.6.0 includes a new memory stress-test suite that supports 1,000+ URL workloads. Ideal for: +- Load testing +- Performance benchmarking +- Validating memory efficiency + +--- + +## Core Improvements +- Robots.txt compliance +- Proxy rotation support +- Improved URL normalization and session reuse +- Shared data across crawler hooks +- New page routing logic + +--- + +## Breaking Changes & Deprecations +- Legacy `crawl4ai/browser/*` modules are removed. Update imports accordingly. +- `AsyncPlaywrightCrawlerStrategy.get_page` now uses a new function signature. +- Deprecated markdown generator aliases now point to `DefaultMarkdownGenerator` with warning. + +--- + +## Miscellaneous Updates +- FastAPI validators replaced custom validation logic +- Docker build now based on a Chromium layer +- Repo-wide cleanup: ~36,000 insertions, ~5,000 deletions + +--- + +## New Examples Included +- Geo-location crawling +- Network + console log capture +- Docker MCP API usage +- Markdown selector usage +- Crypto project data extraction + +--- + +## Watch the Release Video +Want a visual walkthrough of all these updates? Watch the video: +🔗 https://youtu.be/9x7nVcjOZks + +If you're new to Crawl4AI, start here: +🔗 https://www.youtube.com/watch?v=xo3qK6Hg9AA&t=15s + +--- + +## Join the Community +We’ve just opened up our **Discord** for the public. Join us to: +- Ask questions +- Share your projects +- Get help or contribute + +💬 https://discord.gg/wpYFACrHR4 + +--- + +## Install or Upgrade ```bash -pip install -U crawl4ai==0.6.0 +pip install -U crawl4ai ``` --- -## Full changelog - -The diff between `main` and `next` spans **36 k insertions, 4.9 k deletions** over 121 files. Read the [compare view](https://github.com/unclecode/crawl4ai/compare/0.5.0.post8...0.6.0) or see `CHANGELOG.md` for the granular list. - ---- - -## Upgrade tips - -* Using the Docker API? Pull `unclecode/crawl4ai:0.6.0`, new args are documented in `/deploy/docker/README.md`. -* Stress‑test your stack with `tests/memory/run_benchmark.py` before production rollout. -* Markdown generators renamed but aliased, update when convenient, warnings will remind you. - ---- - -Happy crawling, ping `@unclecode` on X for questions or memes. +Live long and import crawl4ai. 🖖