Compare commits
11 Commits
codex/find
...
codex/fix-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0e840aea2b | ||
|
|
85ac6fa523 | ||
|
|
becc4624bb | ||
|
|
754ba731fa | ||
|
|
ac9981a1f5 | ||
|
|
83ef15fd47 | ||
|
|
a3cb938675 | ||
|
|
9b60988232 | ||
|
|
98e951f611 | ||
|
|
baca2df8df | ||
|
|
8a5e23d374 |
@@ -764,6 +764,9 @@ class CrawlerRunConfig():
|
||||
Default: 60000 (60 seconds).
|
||||
wait_for (str or None): A CSS selector or JS condition to wait for before extracting content.
|
||||
Default: None.
|
||||
wait_for_timeout (int or None): Specific timeout in ms for the wait_for condition.
|
||||
If None, uses page_timeout instead.
|
||||
Default: None.
|
||||
wait_for_images (bool): If True, wait for images to load before extracting content.
|
||||
Default: False.
|
||||
delay_before_return_html (float): Delay in seconds before retrieving final HTML.
|
||||
@@ -904,6 +907,7 @@ class CrawlerRunConfig():
|
||||
wait_until: str = "domcontentloaded",
|
||||
page_timeout: int = PAGE_TIMEOUT,
|
||||
wait_for: str = None,
|
||||
wait_for_timeout: int = None,
|
||||
wait_for_images: bool = False,
|
||||
delay_before_return_html: float = 0.1,
|
||||
mean_delay: float = 0.1,
|
||||
@@ -1000,6 +1004,7 @@ class CrawlerRunConfig():
|
||||
self.wait_until = wait_until
|
||||
self.page_timeout = page_timeout
|
||||
self.wait_for = wait_for
|
||||
self.wait_for_timeout = wait_for_timeout
|
||||
self.wait_for_images = wait_for_images
|
||||
self.delay_before_return_html = delay_before_return_html
|
||||
self.mean_delay = mean_delay
|
||||
@@ -1141,6 +1146,7 @@ class CrawlerRunConfig():
|
||||
wait_until=kwargs.get("wait_until", "domcontentloaded"),
|
||||
page_timeout=kwargs.get("page_timeout", 60000),
|
||||
wait_for=kwargs.get("wait_for"),
|
||||
wait_for_timeout=kwargs.get("wait_for_timeout"),
|
||||
wait_for_images=kwargs.get("wait_for_images", False),
|
||||
delay_before_return_html=kwargs.get("delay_before_return_html", 0.1),
|
||||
mean_delay=kwargs.get("mean_delay", 0.1),
|
||||
@@ -1250,6 +1256,7 @@ class CrawlerRunConfig():
|
||||
"wait_until": self.wait_until,
|
||||
"page_timeout": self.page_timeout,
|
||||
"wait_for": self.wait_for,
|
||||
"wait_for_timeout": self.wait_for_timeout,
|
||||
"wait_for_images": self.wait_for_images,
|
||||
"delay_before_return_html": self.delay_before_return_html,
|
||||
"mean_delay": self.mean_delay,
|
||||
|
||||
@@ -937,8 +937,10 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
|
||||
if config.wait_for:
|
||||
try:
|
||||
# Use wait_for_timeout if specified, otherwise fall back to page_timeout
|
||||
timeout = config.wait_for_timeout if config.wait_for_timeout is not None else config.page_timeout
|
||||
await self.smart_wait(
|
||||
page, config.wait_for, timeout=config.page_timeout
|
||||
page, config.wait_for, timeout=timeout
|
||||
)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Wait condition failed: {str(e)}")
|
||||
|
||||
@@ -964,7 +964,10 @@ class BrowserManager:
|
||||
pages = context.pages
|
||||
page = next((p for p in pages if p.url == crawlerRunConfig.url), None)
|
||||
if not page:
|
||||
page = context.pages[0] # await context.new_page()
|
||||
if pages:
|
||||
page = context.pages[0]
|
||||
else:
|
||||
page = await context.new_page()
|
||||
else:
|
||||
# Otherwise, check if we have an existing context for this config
|
||||
config_signature = self._make_config_signature(crawlerRunConfig)
|
||||
|
||||
@@ -235,6 +235,7 @@ async def crawl_people_page(
|
||||
cache_mode=CacheMode.BYPASS,
|
||||
magic=True,
|
||||
wait_for=".org-people-profile-card__card-spacing",
|
||||
wait_for_images=5000,
|
||||
delay_before_return_html=1,
|
||||
session_id="people_search",
|
||||
)
|
||||
@@ -420,8 +421,9 @@ def main():
|
||||
cli_opts = parser.parse_args()
|
||||
|
||||
# decide on debug defaults
|
||||
if cli_opts.debug:
|
||||
if cli_opts.debug or True:
|
||||
opts = detect_debug_defaults(force=True)
|
||||
cli_opts = opts
|
||||
else:
|
||||
env_defaults = detect_debug_defaults()
|
||||
opts = env_defaults if env_defaults else cli_opts
|
||||
|
||||
37
docs/md_v2/assets/feedback-overrides.css
Normal file
37
docs/md_v2/assets/feedback-overrides.css
Normal file
@@ -0,0 +1,37 @@
|
||||
/* docs/assets/feedback-overrides.css */
|
||||
:root {
|
||||
/* brand */
|
||||
--feedback-primary-color: #09b5a5;
|
||||
--feedback-highlight-color: #fed500; /* stars etc */
|
||||
|
||||
/* modal shell / text */
|
||||
--feedback-modal-content-bg-color: var(--background-color);
|
||||
--feedback-modal-content-text-color: var(--font-color);
|
||||
--feedback-modal-content-border-color: var(--primary-dimmed-color);
|
||||
--feedback-modal-content-border-radius: 4px;
|
||||
|
||||
/* overlay */
|
||||
--feedback-overlay-bg-color: rgba(0,0,0,.75);
|
||||
|
||||
/* rating buttons */
|
||||
--feedback-modal-rating-button-color: var(--secondary-color);
|
||||
--feedback-modal-rating-button-selected-color: var(--primary-color);
|
||||
|
||||
/* inputs */
|
||||
--feedback-modal-input-bg-color: var(--code-bg-color);
|
||||
--feedback-modal-input-text-color: var(--font-color);
|
||||
--feedback-modal-input-border-color: var(--primary-dimmed-color);
|
||||
--feedback-modal-input-border-color-focused: var(--primary-color);
|
||||
|
||||
/* submit / secondary buttons */
|
||||
--feedback-modal-button-submit-bg-color: var(--primary-color);
|
||||
--feedback-modal-button-submit-bg-color-hover: var(--primary-dimmed-color);
|
||||
--feedback-modal-button-submit-text-color: var(--invert-font-color);
|
||||
|
||||
--feedback-modal-button-bg-color: transparent; /* screenshot btn */
|
||||
--feedback-modal-button-border-color: var(--primary-color);
|
||||
--feedback-modal-button-icon-color: var(--primary-color);
|
||||
}
|
||||
|
||||
/* optional: keep the “Powered by” link subtle */
|
||||
.feedback-logo a{color:var(--secondary-color);}
|
||||
5
docs/md_v2/assets/gtag.js
Normal file
5
docs/md_v2/assets/gtag.js
Normal file
@@ -0,0 +1,5 @@
|
||||
window.dataLayer = window.dataLayer || [];
|
||||
function gtag(){dataLayer.push(arguments);}
|
||||
gtag('js', new Date());
|
||||
|
||||
gtag('config', 'G-58W0K2ZQ25');
|
||||
BIN
docs/md_v2/favicon.ico
Normal file
BIN
docs/md_v2/favicon.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.4 KiB |
BIN
docs/md_v2/img/favicon-32x32.png
Normal file
BIN
docs/md_v2/img/favicon-32x32.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.6 KiB |
BIN
docs/md_v2/img/favicon-x-32x32.png
Normal file
BIN
docs/md_v2/img/favicon-x-32x32.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.4 KiB |
BIN
docs/md_v2/img/favicon.ico
Normal file
BIN
docs/md_v2/img/favicon.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.4 KiB |
47
docs/md_v2/overrides/main.html
Normal file
47
docs/md_v2/overrides/main.html
Normal file
@@ -0,0 +1,47 @@
|
||||
{% set extra_html_attrs = 'data-theme="dark"' %}
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block extrahead %}
|
||||
{{ super() }}
|
||||
<script>
|
||||
document.documentElement.setAttribute("data-theme", "dark");
|
||||
</script>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/pushfeedback/dist/pushfeedback/pushfeedback.css">
|
||||
|
||||
<style>
|
||||
:root {
|
||||
/* brand */
|
||||
--feedback-primary-color: #09b5a5;
|
||||
--feedback-highlight-color: #fed500;
|
||||
|
||||
|
||||
/* align with the value you really use in :root */
|
||||
--header-height: 65px;
|
||||
|
||||
/* Push modal content down */
|
||||
--feedback-modal-content-position-top: var(--header-height);
|
||||
|
||||
--feedback-modal-modal-wrapper-z-index: 1100;
|
||||
/* > header’s 1000 */
|
||||
--feedback-modal-content-z-index: 1101;
|
||||
}
|
||||
|
||||
feedback-modal::part(overlay) {
|
||||
top: var(--header-height);
|
||||
/* start below header */
|
||||
height: calc(100vh - var(--header-height));
|
||||
/* fill the rest */
|
||||
|
||||
|
||||
}
|
||||
</style>
|
||||
<script type="module"
|
||||
src="https://cdn.jsdelivr.net/npm/pushfeedback@latest/dist/pushfeedback/pushfeedback.esm.js"></script>
|
||||
{% endblock %}
|
||||
|
||||
{% block footer %}
|
||||
<feedback-button project="w8plzp8vjp" button-style="dark" button-position="center-right" modal-position="sidebar-right">
|
||||
>
|
||||
Feedback
|
||||
</feedback-button>
|
||||
{% endblock %}
|
||||
@@ -1,4 +1,5 @@
|
||||
site_name: Crawl4AI Documentation (v0.6.x)
|
||||
site_favicon: docs/md_v2/favicon.ico
|
||||
site_description: 🚀🤖 Crawl4AI, Open-source LLM-Friendly Web Crawler & Scraper
|
||||
site_url: https://docs.crawl4ai.com
|
||||
repo_url: https://github.com/unclecode/crawl4ai
|
||||
@@ -57,6 +58,8 @@ nav:
|
||||
theme:
|
||||
name: 'terminal'
|
||||
palette: 'dark'
|
||||
custom_dir: docs/md_v2/overrides
|
||||
color_mode: 'dark'
|
||||
icon:
|
||||
repo: fontawesome/brands/github
|
||||
|
||||
@@ -82,8 +85,11 @@ extra_css:
|
||||
- assets/styles.css
|
||||
- assets/highlight.css
|
||||
- assets/dmvendor.css
|
||||
- assets/feedback-overrides.css
|
||||
|
||||
extra_javascript:
|
||||
- https://www.googletagmanager.com/gtag/js?id=G-58W0K2ZQ25
|
||||
- assets/gtag.js
|
||||
- assets/highlight.min.js
|
||||
- assets/highlight_init.js
|
||||
- https://buttons.github.io/buttons.js
|
||||
|
||||
@@ -26,7 +26,6 @@ dependencies = [
|
||||
"xxhash~=3.4",
|
||||
"rank-bm25~=0.2",
|
||||
"aiofiles>=24.1.0",
|
||||
"colorama~=0.4",
|
||||
"snowballstemmer~=2.2",
|
||||
"pydantic>=2.10",
|
||||
"pyOpenSSL>=24.3.0",
|
||||
|
||||
Reference in New Issue
Block a user