feat(crawler): add network request and console message capturing

Implement comprehensive network request and console message capturing functionality: - Add capture_network_requests and capture_console_messages config parameters - Add network_requests and console_messages fields to models - Implement Playwright event listeners to capture requests, responses, and console output - Create detailed documentation and examples - Add comprehensive tests This feature enables deep visibility into web page activity for debugging, security analysis, performance profiling, and API discovery in web applications.
2025-04-10 16:03:48 +08:00
parent a2061bf31e
commit 66ac07b4f3
31 changed files with 1686 additions and 10 deletions
--- a/docs/examples/network_console_capture_example.py
+++ b/docs/examples/network_console_capture_example.py
@@ -0,0 +1,471 @@
+import asyncio
+import json
+import os
+import base64
+from pathlib import Path
+from typing import List, Dict, Any
+from datetime import datetime
+
+from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode, CrawlResult
+from crawl4ai import BrowserConfig
+
+__cur_dir__ = Path(__file__).parent
+
+# Create temp directory if it doesn't exist
+os.makedirs(os.path.join(__cur_dir__, "tmp"), exist_ok=True)
+
+async def demo_basic_network_capture():
+    """Basic network request capturing example"""
+    print("\n=== 1. Basic Network Request Capturing ===")
+    
+    async with AsyncWebCrawler() as crawler:
+        config = CrawlerRunConfig(
+            capture_network_requests=True,
+            wait_until="networkidle"  # Wait for network to be idle
+        )
+        
+        result = await crawler.arun(
+            url="https://example.com/",
+            config=config
+        )
+        
+        if result.success and result.network_requests:
+            print(f"Captured {len(result.network_requests)} network events")
+            
+            # Count by event type
+            event_types = {}
+            for req in result.network_requests:
+                event_type = req.get("event_type", "unknown")
+                event_types[event_type] = event_types.get(event_type, 0) + 1
+            
+            print("Event types:")
+            for event_type, count in event_types.items():
+                print(f"  - {event_type}: {count}")
+            
+            # Show a sample request and response
+            request = next((r for r in result.network_requests if r.get("event_type") == "request"), None)
+            response = next((r for r in result.network_requests if r.get("event_type") == "response"), None)
+            
+            if request:
+                print("\nSample request:")
+                print(f"  URL: {request.get('url')}")
+                print(f"  Method: {request.get('method')}")
+                print(f"  Headers: {list(request.get('headers', {}).keys())}")
+            
+            if response:
+                print("\nSample response:")
+                print(f"  URL: {response.get('url')}")
+                print(f"  Status: {response.get('status')} {response.get('status_text', '')}")
+                print(f"  Headers: {list(response.get('headers', {}).keys())}")
+
+async def demo_basic_console_capture():
+    """Basic console message capturing example"""
+    print("\n=== 2. Basic Console Message Capturing ===")
+    
+    # Create a simple HTML file with console messages
+    html_file = os.path.join(__cur_dir__, "tmp", "console_test.html")
+    with open(html_file, "w") as f:
+        f.write("""
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <title>Console Test</title>
+        </head>
+        <body>
+            <h1>Console Message Test</h1>
+            <script>
+                console.log("This is a basic log message");
+                console.info("This is an info message");
+                console.warn("This is a warning message");
+                console.error("This is an error message");
+                
+                // Generate an error
+                try {
+                    nonExistentFunction();
+                } catch (e) {
+                    console.error("Caught error:", e);
+                }
+            </script>
+        </body>
+        </html>
+        """)
+    
+    async with AsyncWebCrawler() as crawler:
+        config = CrawlerRunConfig(
+            capture_console_messages=True,
+            wait_until="networkidle"  # Wait to make sure all scripts execute
+        )
+        
+        result = await crawler.arun(
+            url=f"file://{html_file}",
+            config=config
+        )
+        
+        if result.success and result.console_messages:
+            print(f"Captured {len(result.console_messages)} console messages")
+            
+            # Count by message type
+            message_types = {}
+            for msg in result.console_messages:
+                msg_type = msg.get("type", "unknown")
+                message_types[msg_type] = message_types.get(msg_type, 0) + 1
+            
+            print("Message types:")
+            for msg_type, count in message_types.items():
+                print(f"  - {msg_type}: {count}")
+            
+            # Show all messages
+            print("\nAll console messages:")
+            for i, msg in enumerate(result.console_messages, 1):
+                print(f"  {i}. [{msg.get('type', 'unknown')}] {msg.get('text', '')}")
+
+async def demo_combined_capture():
+    """Capturing both network requests and console messages"""
+    print("\n=== 3. Combined Network and Console Capture ===")
+    
+    async with AsyncWebCrawler() as crawler:
+        config = CrawlerRunConfig(
+            capture_network_requests=True,
+            capture_console_messages=True,
+            wait_until="networkidle"
+        )
+        
+        result = await crawler.arun(
+            url="https://httpbin.org/html",
+            config=config
+        )
+        
+        if result.success:
+            network_count = len(result.network_requests) if result.network_requests else 0
+            console_count = len(result.console_messages) if result.console_messages else 0
+            
+            print(f"Captured {network_count} network events and {console_count} console messages")
+            
+            # Save the captured data to a JSON file for analysis
+            output_file = os.path.join(__cur_dir__, "tmp", "capture_data.json")
+            with open(output_file, "w") as f:
+                json.dump({
+                    "url": result.url,
+                    "timestamp": datetime.now().isoformat(),
+                    "network_requests": result.network_requests,
+                    "console_messages": result.console_messages
+                }, f, indent=2)
+            
+            print(f"Full capture data saved to {output_file}")
+
+async def analyze_spa_network_traffic():
+    """Analyze network traffic of a Single-Page Application"""
+    print("\n=== 4. Analyzing SPA Network Traffic ===")
+    
+    async with AsyncWebCrawler(config=BrowserConfig(
+        headless=True,
+        viewport_width=1280,
+        viewport_height=800
+    )) as crawler:
+        config = CrawlerRunConfig(
+            capture_network_requests=True,
+            capture_console_messages=True,
+            # Wait longer to ensure all resources are loaded
+            wait_until="networkidle",
+            page_timeout=60000,  # 60 seconds
+        )
+        
+        result = await crawler.arun(
+            url="https://weather.com",
+            config=config
+        )
+        
+        if result.success and result.network_requests:
+            # Extract different types of requests
+            requests = []
+            responses = []
+            failures = []
+            
+            for event in result.network_requests:
+                event_type = event.get("event_type")
+                if event_type == "request":
+                    requests.append(event)
+                elif event_type == "response":
+                    responses.append(event)
+                elif event_type == "request_failed":
+                    failures.append(event)
+            
+            print(f"Captured {len(requests)} requests, {len(responses)} responses, and {len(failures)} failures")
+            
+            # Analyze request types
+            resource_types = {}
+            for req in requests:
+                resource_type = req.get("resource_type", "unknown")
+                resource_types[resource_type] = resource_types.get(resource_type, 0) + 1
+            
+            print("\nResource types:")
+            for resource_type, count in sorted(resource_types.items(), key=lambda x: x[1], reverse=True):
+                print(f"  - {resource_type}: {count}")
+            
+            # Analyze API calls
+            api_calls = [r for r in requests if "api" in r.get("url", "").lower()]
+            if api_calls:
+                print(f"\nDetected {len(api_calls)} API calls:")
+                for i, call in enumerate(api_calls[:5], 1):  # Show first 5
+                    print(f"  {i}. {call.get('method')} {call.get('url')}")
+                if len(api_calls) > 5:
+                    print(f"     ... and {len(api_calls) - 5} more")
+            
+            # Analyze response status codes
+            status_codes = {}
+            for resp in responses:
+                status = resp.get("status", 0)
+                status_codes[status] = status_codes.get(status, 0) + 1
+            
+            print("\nResponse status codes:")
+            for status, count in sorted(status_codes.items()):
+                print(f"  - {status}: {count}")
+            
+            # Analyze failures
+            if failures:
+                print("\nFailed requests:")
+                for i, failure in enumerate(failures[:5], 1):  # Show first 5
+                    print(f"  {i}. {failure.get('url')} - {failure.get('failure_text')}")
+                if len(failures) > 5:
+                    print(f"     ... and {len(failures) - 5} more")
+            
+            # Check for console errors
+            if result.console_messages:
+                errors = [msg for msg in result.console_messages if msg.get("type") == "error"]
+                if errors:
+                    print(f"\nDetected {len(errors)} console errors:")
+                    for i, error in enumerate(errors[:3], 1):  # Show first 3
+                        print(f"  {i}. {error.get('text', '')[:100]}...")
+                    if len(errors) > 3:
+                        print(f"     ... and {len(errors) - 3} more")
+            
+            # Save analysis to file
+            output_file = os.path.join(__cur_dir__, "tmp", "weather_network_analysis.json")
+            with open(output_file, "w") as f:
+                json.dump({
+                    "url": result.url,
+                    "timestamp": datetime.now().isoformat(),
+                    "statistics": {
+                        "request_count": len(requests),
+                        "response_count": len(responses),
+                        "failure_count": len(failures),
+                        "resource_types": resource_types,
+                        "status_codes": {str(k): v for k, v in status_codes.items()},
+                        "api_call_count": len(api_calls),
+                        "console_error_count": len(errors) if result.console_messages else 0
+                    },
+                    "network_requests": result.network_requests,
+                    "console_messages": result.console_messages
+                }, f, indent=2)
+            
+            print(f"\nFull analysis saved to {output_file}")
+
+async def demo_security_analysis():
+    """Using network capture for security analysis"""
+    print("\n=== 5. Security Analysis with Network Capture ===")
+    
+    async with AsyncWebCrawler() as crawler:
+        config = CrawlerRunConfig(
+            capture_network_requests=True,
+            capture_console_messages=True,
+            wait_until="networkidle"
+        )
+        
+        # A site that makes multiple third-party requests
+        result = await crawler.arun(
+            url="https://www.nytimes.com/",
+            config=config
+        )
+        
+        if result.success and result.network_requests:
+            print(f"Captured {len(result.network_requests)} network events")
+            
+            # Extract all domains
+            domains = set()
+            for req in result.network_requests:
+                if req.get("event_type") == "request":
+                    url = req.get("url", "")
+                    try:
+                        from urllib.parse import urlparse
+                        domain = urlparse(url).netloc
+                        if domain:
+                            domains.add(domain)
+                    except:
+                        pass
+            
+            print(f"\nDetected requests to {len(domains)} unique domains:")
+            main_domain = urlparse(result.url).netloc
+            
+            # Separate first-party vs third-party domains
+            first_party = [d for d in domains if main_domain in d]
+            third_party = [d for d in domains if main_domain not in d]
+            
+            print(f"  - First-party domains: {len(first_party)}")
+            print(f"  - Third-party domains: {len(third_party)}")
+            
+            # Look for potential trackers/analytics
+            tracking_keywords = ["analytics", "tracker", "pixel", "tag", "stats", "metric", "collect", "beacon"]
+            potential_trackers = []
+            
+            for domain in third_party:
+                if any(keyword in domain.lower() for keyword in tracking_keywords):
+                    potential_trackers.append(domain)
+            
+            if potential_trackers:
+                print(f"\nPotential tracking/analytics domains ({len(potential_trackers)}):")
+                for i, domain in enumerate(sorted(potential_trackers)[:10], 1):
+                    print(f"  {i}. {domain}")
+                if len(potential_trackers) > 10:
+                    print(f"     ... and {len(potential_trackers) - 10} more")
+            
+            # Check for insecure (HTTP) requests
+            insecure_requests = [
+                req.get("url") for req in result.network_requests 
+                if req.get("event_type") == "request" and req.get("url", "").startswith("http://")
+            ]
+            
+            if insecure_requests:
+                print(f"\nWarning: Found {len(insecure_requests)} insecure (HTTP) requests:")
+                for i, url in enumerate(insecure_requests[:5], 1):
+                    print(f"  {i}. {url}")
+                if len(insecure_requests) > 5:
+                    print(f"     ... and {len(insecure_requests) - 5} more")
+            
+            # Save security analysis to file
+            output_file = os.path.join(__cur_dir__, "tmp", "security_analysis.json")
+            with open(output_file, "w") as f:
+                json.dump({
+                    "url": result.url,
+                    "main_domain": main_domain,
+                    "timestamp": datetime.now().isoformat(),
+                    "analysis": {
+                        "total_requests": len([r for r in result.network_requests if r.get("event_type") == "request"]),
+                        "unique_domains": len(domains),
+                        "first_party_domains": first_party,
+                        "third_party_domains": third_party,
+                        "potential_trackers": potential_trackers,
+                        "insecure_requests": insecure_requests
+                    }
+                }, f, indent=2)
+            
+            print(f"\nFull security analysis saved to {output_file}")
+
+async def demo_performance_analysis():
+    """Using network capture for performance analysis"""
+    print("\n=== 6. Performance Analysis with Network Capture ===")
+    
+    async with AsyncWebCrawler() as crawler:
+        config = CrawlerRunConfig(
+            capture_network_requests=True,
+            wait_until="networkidle",
+            page_timeout=60000  # 60 seconds
+        )
+        
+        result = await crawler.arun(
+            url="https://www.cnn.com/",
+            config=config
+        )
+        
+        if result.success and result.network_requests:
+            # Filter only response events with timing information
+            responses_with_timing = [
+                r for r in result.network_requests 
+                if r.get("event_type") == "response" and r.get("request_timing")
+            ]
+            
+            if responses_with_timing:
+                print(f"Analyzing timing for {len(responses_with_timing)} network responses")
+                
+                # Group by resource type
+                resource_timings = {}
+                for resp in responses_with_timing:
+                    url = resp.get("url", "")
+                    timing = resp.get("request_timing", {})
+                    
+                    # Determine resource type from URL extension
+                    ext = url.split(".")[-1].lower() if "." in url.split("/")[-1] else "unknown"
+                    if ext in ["jpg", "jpeg", "png", "gif", "webp", "svg", "ico"]:
+                        resource_type = "image"
+                    elif ext in ["js"]:
+                        resource_type = "javascript"
+                    elif ext in ["css"]:
+                        resource_type = "css"
+                    elif ext in ["woff", "woff2", "ttf", "otf", "eot"]:
+                        resource_type = "font"
+                    else:
+                        resource_type = "other"
+                    
+                    if resource_type not in resource_timings:
+                        resource_timings[resource_type] = []
+                    
+                    # Calculate request duration if timing information is available
+                    if isinstance(timing, dict) and "requestTime" in timing and "receiveHeadersEnd" in timing:
+                        # Convert to milliseconds
+                        duration = (timing["receiveHeadersEnd"] - timing["requestTime"]) * 1000
+                        resource_timings[resource_type].append({
+                            "url": url,
+                            "duration_ms": duration
+                        })
+                
+                # Calculate statistics for each resource type
+                print("\nPerformance by resource type:")
+                for resource_type, timings in resource_timings.items():
+                    if timings:
+                        durations = [t["duration_ms"] for t in timings]
+                        avg_duration = sum(durations) / len(durations)
+                        max_duration = max(durations)
+                        slowest_resource = next(t["url"] for t in timings if t["duration_ms"] == max_duration)
+                        
+                        print(f"  {resource_type.upper()}:")
+                        print(f"    - Count: {len(timings)}")
+                        print(f"    - Avg time: {avg_duration:.2f} ms")
+                        print(f"    - Max time: {max_duration:.2f} ms")
+                        print(f"    - Slowest: {slowest_resource}")
+                
+                # Identify the slowest resources overall
+                all_timings = []
+                for resource_type, timings in resource_timings.items():
+                    for timing in timings:
+                        timing["type"] = resource_type
+                        all_timings.append(timing)
+                
+                all_timings.sort(key=lambda x: x["duration_ms"], reverse=True)
+                
+                print("\nTop 5 slowest resources:")
+                for i, timing in enumerate(all_timings[:5], 1):
+                    print(f"  {i}. [{timing['type']}] {timing['url']} - {timing['duration_ms']:.2f} ms")
+                
+                # Save performance analysis to file
+                output_file = os.path.join(__cur_dir__, "tmp", "performance_analysis.json")
+                with open(output_file, "w") as f:
+                    json.dump({
+                        "url": result.url,
+                        "timestamp": datetime.now().isoformat(),
+                        "resource_timings": resource_timings,
+                        "slowest_resources": all_timings[:10]  # Save top 10
+                    }, f, indent=2)
+                
+                print(f"\nFull performance analysis saved to {output_file}")
+
+async def main():
+    """Run all demo functions sequentially"""
+    print("=== Network and Console Capture Examples ===")
+    
+    # Make sure tmp directory exists
+    os.makedirs(os.path.join(__cur_dir__, "tmp"), exist_ok=True)
+    
+    # Run basic examples
+    await demo_basic_network_capture()
+    await demo_basic_console_capture()
+    await demo_combined_capture()
+    
+    # Run advanced examples
+    await analyze_spa_network_traffic()
+    await demo_security_analysis()
+    await demo_performance_analysis()
+    
+    print("\n=== Examples Complete ===")
+    print(f"Check the tmp directory for output files: {os.path.join(__cur_dir__, 'tmp')}")
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/docs/md_v2/advanced/network-console-capture.md
+++ b/docs/md_v2/advanced/network-console-capture.md
@@ -0,0 +1,205 @@
+# Network Requests & Console Message Capturing
+
+Crawl4AI can capture all network requests and browser console messages during a crawl, which is invaluable for debugging, security analysis, or understanding page behavior.
+
+## Configuration
+
+To enable network and console capturing, use these configuration options:
+
+```python
+from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
+
+# Enable both network request capture and console message capture
+config = CrawlerRunConfig(
+    capture_network_requests=True,  # Capture all network requests and responses
+    capture_console_messages=True   # Capture all browser console output
+)
+```
+
+## Example Usage
+
+```python
+import asyncio
+import json
+from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
+
+async def main():
+    # Enable both network request capture and console message capture
+    config = CrawlerRunConfig(
+        capture_network_requests=True,
+        capture_console_messages=True
+    )
+    
+    async with AsyncWebCrawler() as crawler:
+        result = await crawler.arun(
+            url="https://example.com",
+            config=config
+        )
+        
+        if result.success:
+            # Analyze network requests
+            if result.network_requests:
+                print(f"Captured {len(result.network_requests)} network events")
+                
+                # Count request types
+                request_count = len([r for r in result.network_requests if r.get("event_type") == "request"])
+                response_count = len([r for r in result.network_requests if r.get("event_type") == "response"])
+                failed_count = len([r for r in result.network_requests if r.get("event_type") == "request_failed"])
+                
+                print(f"Requests: {request_count}, Responses: {response_count}, Failed: {failed_count}")
+                
+                # Find API calls
+                api_calls = [r for r in result.network_requests 
+                            if r.get("event_type") == "request" and "api" in r.get("url", "")]
+                if api_calls:
+                    print(f"Detected {len(api_calls)} API calls:")
+                    for call in api_calls[:3]:  # Show first 3
+                        print(f"  - {call.get('method')} {call.get('url')}")
+            
+            # Analyze console messages
+            if result.console_messages:
+                print(f"Captured {len(result.console_messages)} console messages")
+                
+                # Group by type
+                message_types = {}
+                for msg in result.console_messages:
+                    msg_type = msg.get("type", "unknown")
+                    message_types[msg_type] = message_types.get(msg_type, 0) + 1
+                
+                print("Message types:", message_types)
+                
+                # Show errors (often the most important)
+                errors = [msg for msg in result.console_messages if msg.get("type") == "error"]
+                if errors:
+                    print(f"Found {len(errors)} console errors:")
+                    for err in errors[:2]:  # Show first 2
+                        print(f"  - {err.get('text', '')[:100]}")
+            
+            # Export all captured data to a file for detailed analysis
+            with open("network_capture.json", "w") as f:
+                json.dump({
+                    "url": result.url,
+                    "network_requests": result.network_requests or [],
+                    "console_messages": result.console_messages or []
+                }, f, indent=2)
+            
+            print("Exported detailed capture data to network_capture.json")
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+## Captured Data Structure
+
+### Network Requests
+
+The `result.network_requests` contains a list of dictionaries, each representing a network event with these common fields:
+
+| Field | Description |
+|-------|-------------|
+| `event_type` | Type of event: `"request"`, `"response"`, or `"request_failed"` |
+| `url` | The URL of the request |
+| `timestamp` | Unix timestamp when the event was captured |
+
+#### Request Event Fields
+
+```json
+{
+  "event_type": "request",
+  "url": "https://example.com/api/data.json",
+  "method": "GET",
+  "headers": {"User-Agent": "...", "Accept": "..."},
+  "post_data": "key=value&otherkey=value",
+  "resource_type": "fetch",
+  "is_navigation_request": false,
+  "timestamp": 1633456789.123
+}
+```
+
+#### Response Event Fields
+
+```json
+{
+  "event_type": "response",
+  "url": "https://example.com/api/data.json",
+  "status": 200,
+  "status_text": "OK",
+  "headers": {"Content-Type": "application/json", "Cache-Control": "..."},
+  "from_service_worker": false,
+  "request_timing": {"requestTime": 1234.56, "receiveHeadersEnd": 1234.78},
+  "timestamp": 1633456789.456
+}
+```
+
+#### Failed Request Event Fields
+
+```json
+{
+  "event_type": "request_failed",
+  "url": "https://example.com/missing.png",
+  "method": "GET",
+  "resource_type": "image",
+  "failure_text": "net::ERR_ABORTED 404",
+  "timestamp": 1633456789.789
+}
+```
+
+### Console Messages
+
+The `result.console_messages` contains a list of dictionaries, each representing a console message with these common fields:
+
+| Field | Description |
+|-------|-------------|
+| `type` | Message type: `"log"`, `"error"`, `"warning"`, `"info"`, etc. |
+| `text` | The message text |
+| `timestamp` | Unix timestamp when the message was captured |
+
+#### Console Message Example
+
+```json
+{
+  "type": "error",
+  "text": "Uncaught TypeError: Cannot read property 'length' of undefined",
+  "location": "https://example.com/script.js:123:45",
+  "timestamp": 1633456790.123
+}
+```
+
+## Key Benefits
+
+- **Full Request Visibility**: Capture all network activity including:
+  - Requests (URLs, methods, headers, post data)
+  - Responses (status codes, headers, timing)
+  - Failed requests (with error messages)
+  
+- **Console Message Access**: View all JavaScript console output:
+  - Log messages
+  - Warnings
+  - Errors with stack traces
+  - Developer debugging information
+
+- **Debugging Power**: Identify issues such as:
+  - Failed API calls or resource loading
+  - JavaScript errors affecting page functionality
+  - CORS or other security issues
+  - Hidden API endpoints and data flows
+
+- **Security Analysis**: Detect:
+  - Unexpected third-party requests
+  - Data leakage in request payloads
+  - Suspicious script behavior
+
+- **Performance Insights**: Analyze:
+  - Request timing data
+  - Resource loading patterns
+  - Potential bottlenecks
+
+## Use Cases
+
+1. **API Discovery**: Identify hidden endpoints and data flows in single-page applications
+2. **Debugging**: Track down JavaScript errors affecting page functionality
+3. **Security Auditing**: Detect unwanted third-party requests or data leakage
+4. **Performance Analysis**: Identify slow-loading resources
+5. **Ad/Tracker Analysis**: Detect and catalog advertising or tracking calls
+
+This capability is especially valuable for complex sites with heavy JavaScript, single-page applications, or when you need to understand the exact communication happening between a browser and servers.
--- a/docs/md_v2/api/crawl-result.md
+++ b/docs/md_v2/api/crawl-result.md
@@ -281,7 +281,69 @@ for result in results:

 ---

-## 7. Example: Accessing Everything
+## 7. Network Requests & Console Messages
+
+When you enable network and console message capturing in `CrawlerRunConfig` using `capture_network_requests=True` and `capture_console_messages=True`, the `CrawlResult` will include these fields:
+
+### 7.1 **`network_requests`** *(Optional[List[Dict[str, Any]]])*
+**What**: A list of dictionaries containing information about all network requests, responses, and failures captured during the crawl.
+**Structure**:
+- Each item has an `event_type` field that can be `"request"`, `"response"`, or `"request_failed"`.
+- Request events include `url`, `method`, `headers`, `post_data`, `resource_type`, and `is_navigation_request`.
+- Response events include `url`, `status`, `status_text`, `headers`, and `request_timing`.
+- Failed request events include `url`, `method`, `resource_type`, and `failure_text`.
+- All events include a `timestamp` field.
+
+**Usage**:
+```python
+if result.network_requests:
+    # Count different types of events
+    requests = [r for r in result.network_requests if r.get("event_type") == "request"]
+    responses = [r for r in result.network_requests if r.get("event_type") == "response"]
+    failures = [r for r in result.network_requests if r.get("event_type") == "request_failed"]
+    
+    print(f"Captured {len(requests)} requests, {len(responses)} responses, and {len(failures)} failures")
+    
+    # Analyze API calls
+    api_calls = [r for r in requests if "api" in r.get("url", "")]
+    
+    # Identify failed resources
+    for failure in failures:
+        print(f"Failed to load: {failure.get('url')} - {failure.get('failure_text')}")
+```
+
+### 7.2 **`console_messages`** *(Optional[List[Dict[str, Any]]])*
+**What**: A list of dictionaries containing all browser console messages captured during the crawl.
+**Structure**:
+- Each item has a `type` field indicating the message type (e.g., `"log"`, `"error"`, `"warning"`, etc.).
+- The `text` field contains the actual message text.
+- Some messages include `location` information (URL, line, column).
+- All messages include a `timestamp` field.
+
+**Usage**:
+```python
+if result.console_messages:
+    # Count messages by type
+    message_types = {}
+    for msg in result.console_messages:
+        msg_type = msg.get("type", "unknown")
+        message_types[msg_type] = message_types.get(msg_type, 0) + 1
+    
+    print(f"Message type counts: {message_types}")
+    
+    # Display errors (which are usually most important)
+    for msg in result.console_messages:
+        if msg.get("type") == "error":
+            print(f"Error: {msg.get('text')}")
+```
+
+These fields provide deep visibility into the page's network activity and browser console, which is invaluable for debugging, security analysis, and understanding complex web applications.
+
+For more details on network and console capturing, see the [Network & Console Capture documentation](../advanced/network-console-capture.md).
+
+---
+
+## 8. Example: Accessing Everything

 ```python
 async def handle_result(result: CrawlResult):
@@ -321,11 +383,29 @@ async def handle_result(result: CrawlResult):
        print("PDF bytes length:", len(result.pdf))
    if result.mhtml:
        print("MHTML length:", len(result.mhtml))
+        
+    # Network and console capturing
+    if result.network_requests:
+        print(f"Network requests captured: {len(result.network_requests)}")
+        # Analyze request types
+        req_types = {}
+        for req in result.network_requests:
+            if "resource_type" in req:
+                req_types[req["resource_type"]] = req_types.get(req["resource_type"], 0) + 1
+        print(f"Resource types: {req_types}")
+        
+    if result.console_messages:
+        print(f"Console messages captured: {len(result.console_messages)}")
+        # Count by message type
+        msg_types = {}
+        for msg in result.console_messages:
+            msg_types[msg.get("type", "unknown")] = msg_types.get(msg.get("type", "unknown"), 0) + 1
+        print(f"Message types: {msg_types}")
 ```

 ---

-## 8. Key Points & Future
+## 9. Key Points & Future

 1. **Deprecated legacy properties of CrawlResult**  
   - `markdown_v2` - Deprecated in v0.5. Just use `markdown`. It holds the `MarkdownGenerationResult` now!