Commit Message:

Enhance Async Crawler with storage state handling
  - Updated Async Crawler to support storage state management.
  - Added error handling for URL validation in Async Web Crawler.
  - Modified README logo and improved .gitignore entries.
  - Fixed issues in multiple files for better code robustness.
This commit is contained in:
UncleCode
2024-12-09 20:04:59 +08:00
parent c51e901f68
commit 2d31915f0a
7 changed files with 58 additions and 15 deletions

View File

@@ -182,6 +182,10 @@ class AsyncWebCrawler:
Returns:
CrawlResult: The result of crawling and processing
"""
# Check if url is not string and is not empty
if not isinstance(url, str) or not url:
raise ValueError("Invalid URL, make sure the URL is a non-empty string")
async with self._lock or nullcontext():
try:
# Handle deprecated parameters
@@ -335,7 +339,8 @@ class AsyncWebCrawler:
# print(f"{Fore.RED}{self.tag_format('ERROR')} {self.log_icons['ERROR']} Failed to crawl {cache_context.display_url[:URL_LOG_SHORTEN_LENGTH]}... | {e.msg}{Style.RESET_ALL}")
self.logger.error_status(
url=cache_context.display_url,
# url=cache_context.display_url,
url=url,
error=create_box_message(e.msg, type = "error"),
tag="ERROR"
)