feat(docs): update README for version 0.3.74 with new features and improvements

fix(version): update version number to 0.3.74
refactor(async_webcrawler): enhance logging and add domain-based request delay
This commit is contained in:
UncleCode
2024-11-17 21:09:26 +08:00
parent df63a40606
commit 152ac35bc2
5 changed files with 47 additions and 19 deletions

View File

@@ -605,7 +605,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
proxy={"server": self.proxy} if self.proxy else None,
java_script_enabled=True,
accept_downloads=self.accept_downloads,
downloads_path=self.downloads_path if self.accept_downloads else None
# downloads_path=self.downloads_path if self.accept_downloads else None
)
await context.add_cookies([{"name": "cookiesEnabled", "value": "true", "url": url}])
await context.set_extra_http_headers(self.headers)
@@ -905,7 +905,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
)
return response
except Error as e:
raise Error(f"[ERROR] 🚫 crawl(): Failed to crawl {url}: {str(e)}")
raise Error(f"async_crawler_strategy.py:_crawleb(): {str(e)}")
# finally:
# if not session_id:
# await page.close()