feat(crawler): add separate timeout for wait_for condition

Adds a new wait_for_timeout parameter to CrawlerRunConfig that allows specifying
a separate timeout for the wait_for condition, independent of the page_timeout.
This provides more granular control over waiting behaviors in the crawler.

Also removes unused colorama dependency and updates LinkedIn crawler example.

BREAKING CHANGE: LinkedIn crawler example now uses different wait_for_images timing
This commit is contained in:
UncleCode
2025-05-16 17:00:45 +08:00
parent 897e017361
commit 8a5e23d374
4 changed files with 13 additions and 3 deletions

View File

@@ -235,6 +235,7 @@ async def crawl_people_page(
cache_mode=CacheMode.BYPASS,
magic=True,
wait_for=".org-people-profile-card__card-spacing",
wait_for_images=5000,
delay_before_return_html=1,
session_id="people_search",
)
@@ -420,8 +421,9 @@ def main():
cli_opts = parser.parse_args()
# decide on debug defaults
if cli_opts.debug:
if cli_opts.debug or True:
opts = detect_debug_defaults(force=True)
cli_opts = opts
else:
env_defaults = detect_debug_defaults()
opts = env_defaults if env_defaults else cli_opts