Enhance crawler strategies with new features

- ReImplemented JsonXPathExtractionStrategy for enhanced JSON data extraction.
  - Updated existing extraction strategies for better performance.
  - Improved handling of response status codes during crawls.
This commit is contained in:
UncleCode
2024-12-17 22:40:10 +08:00
parent 4a5f1aebee
commit 393bb911c0
4 changed files with 48 additions and 41 deletions

View File

@@ -795,9 +795,14 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
raise RuntimeError(f"Failed on navigating ACS-GOTO:\n{str(e)}")
await self.execute_hook('after_goto', page, context=context)
if response is None:
status_code = 200
response_headers = {}
else:
status_code = response.status
response_headers = response.headers
status_code = response.status
response_headers = response.headers
else:
status_code = 200
response_headers = {}