Apply Ruff Corrections

This commit is contained in:
UncleCode
2025-01-13 19:19:58 +08:00
parent c3370ec5da
commit 8ec12d7d68
84 changed files with 6861 additions and 5076 deletions

View File

@@ -4,7 +4,7 @@ from enum import Enum
class CacheMode(Enum):
"""
Defines the caching behavior for web crawling operations.
Modes:
- ENABLED: Normal caching behavior (read and write)
- DISABLED: No caching at all
@@ -12,6 +12,7 @@ class CacheMode(Enum):
- WRITE_ONLY: Only write to cache, don't read
- BYPASS: Bypass cache for this operation
"""
ENABLED = "enabled"
DISABLED = "disabled"
READ_ONLY = "read_only"
@@ -22,10 +23,10 @@ class CacheMode(Enum):
class CacheContext:
"""
Encapsulates cache-related decisions and URL handling.
This class centralizes all cache-related logic and URL type checking,
making the caching behavior more predictable and maintainable.
Attributes:
url (str): The URL being processed.
cache_mode (CacheMode): The cache mode for the current operation.
@@ -36,10 +37,11 @@ class CacheContext:
is_raw_html (bool): True if the URL is raw HTML, False otherwise.
_url_display (str): The display name for the URL (web, local file, or raw HTML).
"""
def __init__(self, url: str, cache_mode: CacheMode, always_bypass: bool = False):
"""
Initializes the CacheContext with the provided URL and cache mode.
Args:
url (str): The URL being processed.
cache_mode (CacheMode): The cache mode for the current operation.
@@ -48,42 +50,42 @@ class CacheContext:
self.url = url
self.cache_mode = cache_mode
self.always_bypass = always_bypass
self.is_cacheable = url.startswith(('http://', 'https://', 'file://'))
self.is_web_url = url.startswith(('http://', 'https://'))
self.is_cacheable = url.startswith(("http://", "https://", "file://"))
self.is_web_url = url.startswith(("http://", "https://"))
self.is_local_file = url.startswith("file://")
self.is_raw_html = url.startswith("raw:")
self._url_display = url if not self.is_raw_html else "Raw HTML"
def should_read(self) -> bool:
"""
Determines if cache should be read based on context.
How it works:
1. If always_bypass is True or is_cacheable is False, return False.
2. If cache_mode is ENABLED or READ_ONLY, return True.
Returns:
bool: True if cache should be read, False otherwise.
"""
if self.always_bypass or not self.is_cacheable:
return False
return self.cache_mode in [CacheMode.ENABLED, CacheMode.READ_ONLY]
def should_write(self) -> bool:
"""
Determines if cache should be written based on context.
How it works:
1. If always_bypass is True or is_cacheable is False, return False.
2. If cache_mode is ENABLED or WRITE_ONLY, return True.
Returns:
bool: True if cache should be written, False otherwise.
"""
if self.always_bypass or not self.is_cacheable:
return False
return self.cache_mode in [CacheMode.ENABLED, CacheMode.WRITE_ONLY]
@property
def display_url(self) -> str:
"""Returns the URL in display format."""
@@ -94,11 +96,11 @@ def _legacy_to_cache_mode(
disable_cache: bool = False,
bypass_cache: bool = False,
no_cache_read: bool = False,
no_cache_write: bool = False
no_cache_write: bool = False,
) -> CacheMode:
"""
Converts legacy cache parameters to the new CacheMode enum.
This is an internal function to help transition from the old boolean flags
to the new CacheMode system.
"""