Merge branch 'next' into 2025-MAR-ALPHA-1

This commit is contained in:
Aravind Karnam
2025-04-17 10:50:02 +05:30
38 changed files with 5574 additions and 878 deletions

View File

@@ -2,7 +2,7 @@
import warnings
from .async_webcrawler import AsyncWebCrawler, CacheMode
from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig, LLMConfig
from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig, LLMConfig, ProxyConfig
from .content_scraping_strategy import (
ContentScrapingStrategy,
@@ -121,6 +121,7 @@ __all__ = [
"Crawl4aiDockerClient",
"ProxyRotationStrategy",
"RoundRobinProxyStrategy",
"ProxyConfig"
]

View File

@@ -5,6 +5,7 @@ from .config import (
MIN_WORD_THRESHOLD,
IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD,
PROVIDER_MODELS,
PROVIDER_MODELS_PREFIXES,
SCREENSHOT_HEIGHT_TRESHOLD,
PAGE_TIMEOUT,
IMAGE_SCORE_THRESHOLD,
@@ -27,11 +28,8 @@ import inspect
from typing import Any, Dict, Optional
from enum import Enum
from .proxy_strategy import ProxyConfig
try:
from .browser.models import DockerConfig
except ImportError:
DockerConfig = None
# from .proxy_strategy import ProxyConfig
def to_serializable_dict(obj: Any, ignore_default_value : bool = False) -> Dict:
@@ -122,23 +120,25 @@ def from_serializable_dict(data: Any) -> Any:
# Handle typed data
if isinstance(data, dict) and "type" in data:
# Handle plain dictionaries
if data["type"] == "dict":
if data["type"] == "dict" and "value" in data:
return {k: from_serializable_dict(v) for k, v in data["value"].items()}
# Import from crawl4ai for class instances
import crawl4ai
cls = getattr(crawl4ai, data["type"])
if hasattr(crawl4ai, data["type"]):
cls = getattr(crawl4ai, data["type"])
# Handle Enum
if issubclass(cls, Enum):
return cls(data["params"])
# Handle Enum
if issubclass(cls, Enum):
return cls(data["params"])
# Handle class instances
constructor_args = {
k: from_serializable_dict(v) for k, v in data["params"].items()
}
return cls(**constructor_args)
if "params" in data:
# Handle class instances
constructor_args = {
k: from_serializable_dict(v) for k, v in data["params"].items()
}
return cls(**constructor_args)
# Handle lists
if isinstance(data, list):
@@ -159,6 +159,117 @@ def is_empty_value(value: Any) -> bool:
return True
return False
class ProxyConfig:
def __init__(
self,
server: str,
username: Optional[str] = None,
password: Optional[str] = None,
ip: Optional[str] = None,
):
"""Configuration class for a single proxy.
Args:
server: Proxy server URL (e.g., "http://127.0.0.1:8080")
username: Optional username for proxy authentication
password: Optional password for proxy authentication
ip: Optional IP address for verification purposes
"""
self.server = server
self.username = username
self.password = password
# Extract IP from server if not explicitly provided
self.ip = ip or self._extract_ip_from_server()
def _extract_ip_from_server(self) -> Optional[str]:
"""Extract IP address from server URL."""
try:
# Simple extraction assuming http://ip:port format
if "://" in self.server:
parts = self.server.split("://")[1].split(":")
return parts[0]
else:
parts = self.server.split(":")
return parts[0]
except Exception:
return None
@staticmethod
def from_string(proxy_str: str) -> "ProxyConfig":
"""Create a ProxyConfig from a string in the format 'ip:port:username:password'."""
parts = proxy_str.split(":")
if len(parts) == 4: # ip:port:username:password
ip, port, username, password = parts
return ProxyConfig(
server=f"http://{ip}:{port}",
username=username,
password=password,
ip=ip
)
elif len(parts) == 2: # ip:port only
ip, port = parts
return ProxyConfig(
server=f"http://{ip}:{port}",
ip=ip
)
else:
raise ValueError(f"Invalid proxy string format: {proxy_str}")
@staticmethod
def from_dict(proxy_dict: Dict) -> "ProxyConfig":
"""Create a ProxyConfig from a dictionary."""
return ProxyConfig(
server=proxy_dict.get("server"),
username=proxy_dict.get("username"),
password=proxy_dict.get("password"),
ip=proxy_dict.get("ip")
)
@staticmethod
def from_env(env_var: str = "PROXIES") -> List["ProxyConfig"]:
"""Load proxies from environment variable.
Args:
env_var: Name of environment variable containing comma-separated proxy strings
Returns:
List of ProxyConfig objects
"""
proxies = []
try:
proxy_list = os.getenv(env_var, "").split(",")
for proxy in proxy_list:
if not proxy:
continue
proxies.append(ProxyConfig.from_string(proxy))
except Exception as e:
print(f"Error loading proxies from environment: {e}")
return proxies
def to_dict(self) -> Dict:
"""Convert to dictionary representation."""
return {
"server": self.server,
"username": self.username,
"password": self.password,
"ip": self.ip
}
def clone(self, **kwargs) -> "ProxyConfig":
"""Create a copy of this configuration with updated values.
Args:
**kwargs: Key-value pairs of configuration options to update
Returns:
ProxyConfig: A new instance with the specified updates
"""
config_dict = self.to_dict()
config_dict.update(kwargs)
return ProxyConfig.from_dict(config_dict)
class BrowserConfig:
"""
@@ -195,8 +306,6 @@ class BrowserConfig:
Default: None.
proxy_config (ProxyConfig or dict or None): Detailed proxy configuration, e.g. {"server": "...", "username": "..."}.
If None, no additional proxy config. Default: None.
docker_config (DockerConfig or dict or None): Configuration for Docker-based browser automation.
Contains settings for Docker container operation. Default: None.
viewport_width (int): Default viewport width for pages. Default: 1080.
viewport_height (int): Default viewport height for pages. Default: 600.
viewport (dict): Default viewport dimensions for pages. If set, overrides viewport_width and viewport_height.
@@ -242,7 +351,6 @@ class BrowserConfig:
channel: str = "chromium",
proxy: str = None,
proxy_config: Union[ProxyConfig, dict, None] = None,
docker_config: Union[DockerConfig, dict, None] = None,
viewport_width: int = 1080,
viewport_height: int = 600,
viewport: dict = None,
@@ -283,15 +391,7 @@ class BrowserConfig:
self.chrome_channel = ""
self.proxy = proxy
self.proxy_config = proxy_config
# Handle docker configuration
if isinstance(docker_config, dict) and DockerConfig is not None:
self.docker_config = DockerConfig.from_kwargs(docker_config)
else:
self.docker_config = docker_config
if self.docker_config:
self.user_data_dir = self.docker_config.user_data_dir
self.viewport_width = viewport_width
self.viewport_height = viewport_height
@@ -362,7 +462,6 @@ class BrowserConfig:
channel=kwargs.get("channel", "chromium"),
proxy=kwargs.get("proxy"),
proxy_config=kwargs.get("proxy_config", None),
docker_config=kwargs.get("docker_config", None),
viewport_width=kwargs.get("viewport_width", 1080),
viewport_height=kwargs.get("viewport_height", 600),
accept_downloads=kwargs.get("accept_downloads", False),
@@ -419,13 +518,7 @@ class BrowserConfig:
"debugging_port": self.debugging_port,
"host": self.host,
}
# Include docker_config if it exists
if hasattr(self, "docker_config") and self.docker_config is not None:
if hasattr(self.docker_config, "to_dict"):
result["docker_config"] = self.docker_config.to_dict()
else:
result["docker_config"] = self.docker_config
return result
@@ -1178,9 +1271,18 @@ class LLMConfig:
elif api_token and api_token.startswith("env:"):
self.api_token = os.getenv(api_token[4:])
else:
self.api_token = PROVIDER_MODELS.get(provider, "no-token") or os.getenv(
DEFAULT_PROVIDER_API_KEY
)
# Check if given provider starts with any of key in PROVIDER_MODELS_PREFIXES
# If not, check if it is in PROVIDER_MODELS
prefixes = PROVIDER_MODELS_PREFIXES.keys()
if any(provider.startswith(prefix) for prefix in prefixes):
selected_prefix = next(
(prefix for prefix in prefixes if provider.startswith(prefix)),
None,
)
self.api_token = PROVIDER_MODELS_PREFIXES.get(selected_prefix)
else:
self.provider = DEFAULT_PROVIDER
self.api_token = os.getenv(DEFAULT_PROVIDER_API_KEY)
self.base_url = base_url
self.temprature = temprature
self.max_tokens = max_tokens

View File

@@ -36,7 +36,7 @@ from .markdown_generation_strategy import (
)
from .deep_crawling import DeepCrawlDecorator
from .async_logger import AsyncLogger, AsyncLoggerBase
from .async_configs import BrowserConfig, CrawlerRunConfig
from .async_configs import BrowserConfig, CrawlerRunConfig, ProxyConfig
from .async_dispatcher import * # noqa: F403
from .async_dispatcher import BaseDispatcher, MemoryAdaptiveDispatcher, RateLimiter
@@ -291,12 +291,12 @@ class AsyncWebCrawler:
# Update proxy configuration from rotation strategy if available
if config and config.proxy_rotation_strategy:
next_proxy = await config.proxy_rotation_strategy.get_next_proxy()
next_proxy : ProxyConfig = await config.proxy_rotation_strategy.get_next_proxy()
if next_proxy:
self.logger.info(
message="Switch proxy: {proxy}",
tag="PROXY",
params={"proxy": next_proxy.server},
params={"proxy": next_proxy.server}
)
config.proxy_config = next_proxy
# config = config.clone(proxy_config=next_proxy)

View File

@@ -94,6 +94,7 @@ class ManagedBrowser:
host: str = "localhost",
debugging_port: int = 9222,
cdp_url: Optional[str] = None,
browser_config: Optional[BrowserConfig] = None,
):
"""
Initialize the ManagedBrowser instance.
@@ -109,17 +110,19 @@ class ManagedBrowser:
host (str): Host for debugging the browser. Default: "localhost".
debugging_port (int): Port for debugging the browser. Default: 9222.
cdp_url (str or None): CDP URL to connect to the browser. Default: None.
browser_config (BrowserConfig): Configuration object containing all browser settings. Default: None.
"""
self.browser_type = browser_type
self.user_data_dir = user_data_dir
self.headless = headless
self.browser_type = browser_config.browser_type
self.user_data_dir = browser_config.user_data_dir
self.headless = browser_config.headless
self.browser_process = None
self.temp_dir = None
self.debugging_port = debugging_port
self.host = host
self.debugging_port = browser_config.debugging_port
self.host = browser_config.host
self.logger = logger
self.shutting_down = False
self.cdp_url = cdp_url
self.cdp_url = browser_config.cdp_url
self.browser_config = browser_config
async def start(self) -> str:
"""
@@ -142,6 +145,9 @@ class ManagedBrowser:
# Get browser path and args based on OS and browser type
# browser_path = self._get_browser_path()
args = await self._get_browser_args()
if self.browser_config.extra_args:
args.extend(self.browser_config.extra_args)
# Start browser process
try:
@@ -477,6 +483,7 @@ class BrowserManager:
logger=self.logger,
debugging_port=self.config.debugging_port,
cdp_url=self.config.cdp_url,
browser_config=self.config,
)
async def start(self):
@@ -491,10 +498,12 @@ class BrowserManager:
Note: This method should be called in a separate task to avoid blocking the main event loop.
"""
if self.playwright is None:
from playwright.async_api import async_playwright
if self.playwright is not None:
await self.close()
from playwright.async_api import async_playwright
self.playwright = await async_playwright().start()
self.playwright = await async_playwright().start()
if self.config.cdp_url or self.config.use_managed_browser:
self.config.use_managed_browser = True

View File

@@ -29,6 +29,14 @@ PROVIDER_MODELS = {
'gemini/gemini-2.0-flash-lite-preview-02-05': os.getenv("GEMINI_API_KEY"),
"deepseek/deepseek-chat": os.getenv("DEEPSEEK_API_KEY"),
}
PROVIDER_MODELS_PREFIXES = {
"ollama": "no-token-needed", # Any model from Ollama no need for API token
"groq": os.getenv("GROQ_API_KEY"),
"openai": os.getenv("OPENAI_API_KEY"),
"anthropic": os.getenv("ANTHROPIC_API_KEY"),
"gemini": os.getenv("GEMINI_API_KEY"),
"deepseek": os.getenv("DEEPSEEK_API_KEY"),
}
# Chunk token threshold
CHUNK_TOKEN_THRESHOLD = 2**11 # 2048 tokens

View File

@@ -7,7 +7,9 @@ import time
from .prompts import PROMPT_EXTRACT_BLOCKS, PROMPT_EXTRACT_BLOCKS_WITH_INSTRUCTION, PROMPT_EXTRACT_SCHEMA_WITH_INSTRUCTION, JSON_SCHEMA_BUILDER_XPATH, PROMPT_EXTRACT_INFERRED_SCHEMA
from .config import (
DEFAULT_PROVIDER, CHUNK_TOKEN_THRESHOLD,
DEFAULT_PROVIDER,
DEFAULT_PROVIDER_API_KEY,
CHUNK_TOKEN_THRESHOLD,
OVERLAP_RATE,
WORD_TOKEN_RATE,
)
@@ -542,6 +544,11 @@ class LLMExtractionStrategy(ExtractionStrategy):
"""
super().__init__( input_format=input_format, **kwargs)
self.llm_config = llm_config
if not self.llm_config:
self.llm_config = create_llm_config(
provider=DEFAULT_PROVIDER,
api_token=os.environ.get(DEFAULT_PROVIDER_API_KEY),
)
self.instruction = instruction
self.extract_type = extraction_type
self.schema = schema

View File

@@ -40,10 +40,25 @@ def setup_home_directory():
f.write("")
def post_install():
"""Run all post-installation tasks"""
"""
Run all post-installation tasks.
Checks CRAWL4AI_MODE environment variable. If set to 'api',
skips Playwright browser installation.
"""
logger.info("Running post-installation setup...", tag="INIT")
setup_home_directory()
install_playwright()
# Check environment variable to conditionally skip Playwright install
run_mode = os.getenv('CRAWL4AI_MODE')
if run_mode == 'api':
logger.warning(
"CRAWL4AI_MODE=api detected. Skipping Playwright browser installation.",
tag="SETUP"
)
else:
# Proceed with installation only if mode is not 'api'
install_playwright()
run_migration()
# TODO: Will be added in the future
# setup_builtin_browser()

View File

@@ -4,6 +4,9 @@ from itertools import cycle
import os
########### ATTENTION PEOPLE OF EARTH ###########
# I have moved this config to async_configs.py, kept it here, in case someone still importing it, however
# be a dear and follow `from crawl4ai import ProxyConfig` instead :)
class ProxyConfig:
def __init__(
self,
@@ -119,12 +122,12 @@ class ProxyRotationStrategy(ABC):
"""Base abstract class for proxy rotation strategies"""
@abstractmethod
async def get_next_proxy(self) -> Optional[Dict]:
async def get_next_proxy(self) -> Optional[ProxyConfig]:
"""Get next proxy configuration from the strategy"""
pass
@abstractmethod
def add_proxies(self, proxies: List[Dict]):
def add_proxies(self, proxies: List[ProxyConfig]):
"""Add proxy configurations to the strategy"""
pass

View File

@@ -9,83 +9,44 @@ from urllib.parse import urlparse
import OpenSSL.crypto
from pathlib import Path
class SSLCertificate:
# === Inherit from dict ===
class SSLCertificate(dict):
"""
A class representing an SSL certificate with methods to export in various formats.
A class representing an SSL certificate, behaving like a dictionary
for direct JSON serialization. It stores the certificate information internally
and provides methods for export and property access.
Attributes:
cert_info (Dict[str, Any]): The certificate information.
Methods:
from_url(url: str, timeout: int = 10) -> Optional['SSLCertificate']: Create SSLCertificate instance from a URL.
from_file(file_path: str) -> Optional['SSLCertificate']: Create SSLCertificate instance from a file.
from_binary(binary_data: bytes) -> Optional['SSLCertificate']: Create SSLCertificate instance from binary data.
export_as_pem() -> str: Export the certificate as PEM format.
export_as_der() -> bytes: Export the certificate as DER format.
export_as_json() -> Dict[str, Any]: Export the certificate as JSON format.
export_as_text() -> str: Export the certificate as text format.
Inherits from dict, so instances are directly JSON serializable.
"""
# Use __slots__ for potential memory optimization if desired, though less common when inheriting dict
# __slots__ = ("_cert_info",) # If using slots, be careful with dict inheritance interaction
def __init__(self, cert_info: Dict[str, Any]):
self._cert_info = self._decode_cert_data(cert_info)
@staticmethod
def from_url(url: str, timeout: int = 10) -> Optional["SSLCertificate"]:
"""
Create SSLCertificate instance from a URL.
Initializes the SSLCertificate object.
Args:
url (str): URL of the website.
timeout (int): Timeout for the connection (default: 10).
Returns:
Optional[SSLCertificate]: SSLCertificate instance if successful, None otherwise.
cert_info (Dict[str, Any]): The raw certificate dictionary.
"""
try:
hostname = urlparse(url).netloc
if ":" in hostname:
hostname = hostname.split(":")[0]
# 1. Decode the data (handle bytes -> str)
decoded_info = self._decode_cert_data(cert_info)
context = ssl.create_default_context()
with socket.create_connection((hostname, 443), timeout=timeout) as sock:
with context.wrap_socket(sock, server_hostname=hostname) as ssock:
cert_binary = ssock.getpeercert(binary_form=True)
x509 = OpenSSL.crypto.load_certificate(
OpenSSL.crypto.FILETYPE_ASN1, cert_binary
)
# 2. Store the decoded info internally (optional but good practice)
# self._cert_info = decoded_info # You can keep this if methods rely on it
cert_info = {
"subject": dict(x509.get_subject().get_components()),
"issuer": dict(x509.get_issuer().get_components()),
"version": x509.get_version(),
"serial_number": hex(x509.get_serial_number()),
"not_before": x509.get_notBefore(),
"not_after": x509.get_notAfter(),
"fingerprint": x509.digest("sha256").hex(),
"signature_algorithm": x509.get_signature_algorithm(),
"raw_cert": base64.b64encode(cert_binary),
}
# Add extensions
extensions = []
for i in range(x509.get_extension_count()):
ext = x509.get_extension(i)
extensions.append(
{"name": ext.get_short_name(), "value": str(ext)}
)
cert_info["extensions"] = extensions
return SSLCertificate(cert_info)
except Exception:
return None
# 3. Initialize the dictionary part of the object with the decoded data
super().__init__(decoded_info)
@staticmethod
def _decode_cert_data(data: Any) -> Any:
"""Helper method to decode bytes in certificate data."""
if isinstance(data, bytes):
return data.decode("utf-8")
try:
# Try UTF-8 first, fallback to latin-1 for arbitrary bytes
return data.decode("utf-8")
except UnicodeDecodeError:
return data.decode("latin-1") # Or handle as needed, maybe hex representation
elif isinstance(data, dict):
return {
(
@@ -97,36 +58,119 @@ class SSLCertificate:
return [SSLCertificate._decode_cert_data(item) for item in data]
return data
@staticmethod
def from_url(url: str, timeout: int = 10) -> Optional["SSLCertificate"]:
"""
Create SSLCertificate instance from a URL. Fetches cert info and initializes.
(Fetching logic remains the same)
"""
cert_info_raw = None # Variable to hold the fetched dict
try:
hostname = urlparse(url).netloc
if ":" in hostname:
hostname = hostname.split(":")[0]
context = ssl.create_default_context()
# Set check_hostname to False and verify_mode to CERT_NONE temporarily
# for potentially problematic certificates during fetch, but parse the result regardless.
# context.check_hostname = False
# context.verify_mode = ssl.CERT_NONE
with socket.create_connection((hostname, 443), timeout=timeout) as sock:
with context.wrap_socket(sock, server_hostname=hostname) as ssock:
cert_binary = ssock.getpeercert(binary_form=True)
if not cert_binary:
print(f"Warning: No certificate returned for {hostname}")
return None
x509 = OpenSSL.crypto.load_certificate(
OpenSSL.crypto.FILETYPE_ASN1, cert_binary
)
# Create the dictionary directly
cert_info_raw = {
"subject": dict(x509.get_subject().get_components()),
"issuer": dict(x509.get_issuer().get_components()),
"version": x509.get_version(),
"serial_number": hex(x509.get_serial_number()),
"not_before": x509.get_notBefore(), # Keep as bytes initially, _decode handles it
"not_after": x509.get_notAfter(), # Keep as bytes initially
"fingerprint": x509.digest("sha256").hex(), # hex() is already string
"signature_algorithm": x509.get_signature_algorithm(), # Keep as bytes
"raw_cert": base64.b64encode(cert_binary), # Base64 is bytes, _decode handles it
}
# Add extensions
extensions = []
for i in range(x509.get_extension_count()):
ext = x509.get_extension(i)
# get_short_name() returns bytes, str(ext) handles value conversion
extensions.append(
{"name": ext.get_short_name(), "value": str(ext)}
)
cert_info_raw["extensions"] = extensions
except ssl.SSLCertVerificationError as e:
print(f"SSL Verification Error for {url}: {e}")
# Decide if you want to proceed or return None based on your needs
# You might try fetching without verification here if needed, but be cautious.
return None
except socket.gaierror:
print(f"Could not resolve hostname: {hostname}")
return None
except socket.timeout:
print(f"Connection timed out for {url}")
return None
except Exception as e:
print(f"Error fetching/processing certificate for {url}: {e}")
# Log the full error details if needed: logging.exception("Cert fetch error")
return None
# If successful, create the SSLCertificate instance from the dictionary
if cert_info_raw:
return SSLCertificate(cert_info_raw)
else:
return None
# --- Properties now access the dictionary items directly via self[] ---
@property
def issuer(self) -> Dict[str, str]:
return self.get("issuer", {}) # Use self.get for safety
@property
def subject(self) -> Dict[str, str]:
return self.get("subject", {})
@property
def valid_from(self) -> str:
return self.get("not_before", "")
@property
def valid_until(self) -> str:
return self.get("not_after", "")
@property
def fingerprint(self) -> str:
return self.get("fingerprint", "")
# --- Export methods can use `self` directly as it is the dict ---
def to_json(self, filepath: Optional[str] = None) -> Optional[str]:
"""
Export certificate as JSON.
Args:
filepath (Optional[str]): Path to save the JSON file (default: None).
Returns:
Optional[str]: JSON string if successful, None otherwise.
"""
json_str = json.dumps(self._cert_info, indent=2, ensure_ascii=False)
"""Export certificate as JSON."""
# `self` is already the dictionary we want to serialize
json_str = json.dumps(self, indent=2, ensure_ascii=False)
if filepath:
Path(filepath).write_text(json_str, encoding="utf-8")
return None
return json_str
def to_pem(self, filepath: Optional[str] = None) -> Optional[str]:
"""
Export certificate as PEM.
Args:
filepath (Optional[str]): Path to save the PEM file (default: None).
Returns:
Optional[str]: PEM string if successful, None otherwise.
"""
"""Export certificate as PEM."""
try:
# Decode the raw_cert (which should be string due to _decode)
raw_cert_bytes = base64.b64decode(self.get("raw_cert", ""))
x509 = OpenSSL.crypto.load_certificate(
OpenSSL.crypto.FILETYPE_ASN1,
base64.b64decode(self._cert_info["raw_cert"]),
OpenSSL.crypto.FILETYPE_ASN1, raw_cert_bytes
)
pem_data = OpenSSL.crypto.dump_certificate(
OpenSSL.crypto.FILETYPE_PEM, x509
@@ -136,49 +180,25 @@ class SSLCertificate:
Path(filepath).write_text(pem_data, encoding="utf-8")
return None
return pem_data
except Exception:
return None
except Exception as e:
print(f"Error converting to PEM: {e}")
return None
def to_der(self, filepath: Optional[str] = None) -> Optional[bytes]:
"""
Export certificate as DER.
Args:
filepath (Optional[str]): Path to save the DER file (default: None).
Returns:
Optional[bytes]: DER bytes if successful, None otherwise.
"""
"""Export certificate as DER."""
try:
der_data = base64.b64decode(self._cert_info["raw_cert"])
# Decode the raw_cert (which should be string due to _decode)
der_data = base64.b64decode(self.get("raw_cert", ""))
if filepath:
Path(filepath).write_bytes(der_data)
return None
return der_data
except Exception:
return None
except Exception as e:
print(f"Error converting to DER: {e}")
return None
@property
def issuer(self) -> Dict[str, str]:
"""Get certificate issuer information."""
return self._cert_info.get("issuer", {})
@property
def subject(self) -> Dict[str, str]:
"""Get certificate subject information."""
return self._cert_info.get("subject", {})
@property
def valid_from(self) -> str:
"""Get certificate validity start date."""
return self._cert_info.get("not_before", "")
@property
def valid_until(self) -> str:
"""Get certificate validity end date."""
return self._cert_info.get("not_after", "")
@property
def fingerprint(self) -> str:
"""Get certificate fingerprint."""
return self._cert_info.get("fingerprint", "")
# Optional: Add __repr__ for better debugging
def __repr__(self) -> str:
subject_cn = self.subject.get('CN', 'N/A')
issuer_cn = self.issuer.get('CN', 'N/A')
return f"<SSLCertificate Subject='{subject_cn}' Issuer='{issuer_cn}'>"