Merge branch 'next' into 2025-MAR-ALPHA-1

This commit is contained in:
Aravind Karnam
2025-04-17 10:50:02 +05:30
38 changed files with 5574 additions and 878 deletions

View File

@@ -357,8 +357,7 @@ async def demo_performance_analysis():
async with AsyncWebCrawler() as crawler:
config = CrawlerRunConfig(
capture_network_requests=True,
wait_until="networkidle",
page_timeout=60000 # 60 seconds
page_timeout=60 * 2 * 1000 # 120 seconds
)
result = await crawler.arun(
@@ -406,6 +405,13 @@ async def demo_performance_analysis():
"url": url,
"duration_ms": duration
})
if isinstance(timing, dict) and "requestStart" in timing and "responseStart" in timing and "startTime" in timing:
# Convert to milliseconds
duration = (timing["responseStart"] - timing["requestStart"]) * 1000
resource_timings[resource_type].append({
"url": url,
"duration_ms": duration
})
# Calculate statistics for each resource type
print("\nPerformance by resource type:")
@@ -455,14 +461,14 @@ async def main():
os.makedirs(os.path.join(__cur_dir__, "tmp"), exist_ok=True)
# Run basic examples
await demo_basic_network_capture()
# await demo_basic_network_capture()
await demo_basic_console_capture()
await demo_combined_capture()
# await demo_combined_capture()
# Run advanced examples
await analyze_spa_network_traffic()
await demo_security_analysis()
await demo_performance_analysis()
# await analyze_spa_network_traffic()
# await demo_security_analysis()
# await demo_performance_analysis()
print("\n=== Examples Complete ===")
print(f"Check the tmp directory for output files: {os.path.join(__cur_dir__, 'tmp')}")

View File

@@ -4,7 +4,7 @@ import json
import base64
from pathlib import Path
from typing import List
from crawl4ai.proxy_strategy import ProxyConfig
from crawl4ai import ProxyConfig
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode, CrawlResult
from crawl4ai import RoundRobinProxyStrategy

View File

@@ -13,7 +13,7 @@ from crawl4ai.deep_crawling import (
)
from crawl4ai.deep_crawling.scorers import KeywordRelevanceScorer
from crawl4ai.async_crawler_strategy import AsyncHTTPCrawlerStrategy
from crawl4ai.proxy_strategy import ProxyConfig
from crawl4ai import ProxyConfig
from crawl4ai import RoundRobinProxyStrategy
from crawl4ai.content_filter_strategy import LLMContentFilter
from crawl4ai import DefaultMarkdownGenerator