feat(crawler): add network request and console message capturing

Implement comprehensive network request and console message capturing functionality:
- Add capture_network_requests and capture_console_messages config parameters
- Add network_requests and console_messages fields to models
- Implement Playwright event listeners to capture requests, responses, and console output
- Create detailed documentation and examples
- Add comprehensive tests

This feature enables deep visibility into web page activity for debugging,
security analysis, performance profiling, and API discovery in web applications.
This commit is contained in:
unclecode
2025-04-10 16:03:48 +08:00
parent a2061bf31e
commit 66ac07b4f3
31 changed files with 1686 additions and 10 deletions

View File

@@ -148,6 +148,8 @@ class CrawlResult(BaseModel):
ssl_certificate: Optional[SSLCertificate] = None
dispatch_result: Optional[DispatchResult] = None
redirected_url: Optional[str] = None
network_requests: Optional[List[Dict[str, Any]]] = None
console_messages: Optional[List[Dict[str, Any]]] = None
class Config:
arbitrary_types_allowed = True
@@ -315,6 +317,8 @@ class AsyncCrawlResponse(BaseModel):
downloaded_files: Optional[List[str]] = None
ssl_certificate: Optional[SSLCertificate] = None
redirected_url: Optional[str] = None
network_requests: Optional[List[Dict[str, Any]]] = None
console_messages: Optional[List[Dict[str, Any]]] = None
class Config:
arbitrary_types_allowed = True