Merge branch 'pr-971' into merge-pr971
This commit is contained in:
@@ -171,7 +171,10 @@ class AsyncDatabaseManager:
|
|||||||
f"Code context:\n{error_context['code_context']}"
|
f"Code context:\n{error_context['code_context']}"
|
||||||
)
|
)
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
message=create_box_message(error_message, type="error"),
|
message="{error}",
|
||||||
|
tag="ERROR",
|
||||||
|
params={"error": str(error_message)},
|
||||||
|
boxes=["error"],
|
||||||
)
|
)
|
||||||
|
|
||||||
raise
|
raise
|
||||||
@@ -189,7 +192,10 @@ class AsyncDatabaseManager:
|
|||||||
f"Code context:\n{error_context['code_context']}"
|
f"Code context:\n{error_context['code_context']}"
|
||||||
)
|
)
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
message=create_box_message(error_message, type="error"),
|
message="{error}",
|
||||||
|
tag="ERROR",
|
||||||
|
params={"error": str(error_message)},
|
||||||
|
boxes=["error"],
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Optional, Dict, Any
|
from typing import Optional, Dict, Any, List
|
||||||
from colorama import Fore, Style, init
|
|
||||||
import os
|
import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.text import Text
|
||||||
|
from .utils import create_box_message
|
||||||
|
|
||||||
|
|
||||||
class LogLevel(Enum):
|
class LogLevel(Enum):
|
||||||
@@ -21,6 +23,26 @@ class LogLevel(Enum):
|
|||||||
FATAL = 10
|
FATAL = 10
|
||||||
|
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name.lower()
|
||||||
|
|
||||||
|
class LogColor(str, Enum):
|
||||||
|
"""Enum for log colors."""
|
||||||
|
|
||||||
|
DEBUG = "lightblack"
|
||||||
|
INFO = "cyan"
|
||||||
|
SUCCESS = "green"
|
||||||
|
WARNING = "yellow"
|
||||||
|
ERROR = "red"
|
||||||
|
CYAN = "cyan"
|
||||||
|
GREEN = "green"
|
||||||
|
YELLOW = "yellow"
|
||||||
|
MAGENTA = "magenta"
|
||||||
|
DIM_MAGENTA = "dim magenta"
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
"""Automatically convert rich color to string."""
|
||||||
|
return self.value
|
||||||
|
|
||||||
|
|
||||||
class AsyncLoggerBase(ABC):
|
class AsyncLoggerBase(ABC):
|
||||||
@@ -52,6 +74,7 @@ class AsyncLoggerBase(ABC):
|
|||||||
def error_status(self, url: str, error: str, tag: str = "ERROR", url_length: int = 100):
|
def error_status(self, url: str, error: str, tag: str = "ERROR", url_length: int = 100):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class AsyncLogger(AsyncLoggerBase):
|
class AsyncLogger(AsyncLoggerBase):
|
||||||
"""
|
"""
|
||||||
Asynchronous logger with support for colored console output and file logging.
|
Asynchronous logger with support for colored console output and file logging.
|
||||||
@@ -79,17 +102,11 @@ class AsyncLogger(AsyncLoggerBase):
|
|||||||
}
|
}
|
||||||
|
|
||||||
DEFAULT_COLORS = {
|
DEFAULT_COLORS = {
|
||||||
LogLevel.DEBUG: Fore.LIGHTBLACK_EX,
|
LogLevel.DEBUG: LogColor.DEBUG,
|
||||||
LogLevel.INFO: Fore.CYAN,
|
LogLevel.INFO: LogColor.INFO,
|
||||||
LogLevel.SUCCESS: Fore.GREEN,
|
LogLevel.SUCCESS: LogColor.SUCCESS,
|
||||||
LogLevel.WARNING: Fore.YELLOW,
|
LogLevel.WARNING: LogColor.WARNING,
|
||||||
LogLevel.ERROR: Fore.RED,
|
LogLevel.ERROR: LogColor.ERROR,
|
||||||
LogLevel.CRITICAL: Fore.RED + Style.BRIGHT,
|
|
||||||
LogLevel.ALERT: Fore.RED + Style.BRIGHT,
|
|
||||||
LogLevel.NOTICE: Fore.BLUE,
|
|
||||||
LogLevel.EXCEPTION: Fore.RED + Style.BRIGHT,
|
|
||||||
LogLevel.FATAL: Fore.RED + Style.BRIGHT,
|
|
||||||
LogLevel.DEFAULT: Fore.WHITE,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -98,7 +115,7 @@ class AsyncLogger(AsyncLoggerBase):
|
|||||||
log_level: LogLevel = LogLevel.DEBUG,
|
log_level: LogLevel = LogLevel.DEBUG,
|
||||||
tag_width: int = 10,
|
tag_width: int = 10,
|
||||||
icons: Optional[Dict[str, str]] = None,
|
icons: Optional[Dict[str, str]] = None,
|
||||||
colors: Optional[Dict[LogLevel, str]] = None,
|
colors: Optional[Dict[LogLevel, LogColor]] = None,
|
||||||
verbose: bool = True,
|
verbose: bool = True,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@@ -112,13 +129,13 @@ class AsyncLogger(AsyncLoggerBase):
|
|||||||
colors: Custom colors for different log levels
|
colors: Custom colors for different log levels
|
||||||
verbose: Whether to output to console
|
verbose: Whether to output to console
|
||||||
"""
|
"""
|
||||||
init() # Initialize colorama
|
|
||||||
self.log_file = log_file
|
self.log_file = log_file
|
||||||
self.log_level = log_level
|
self.log_level = log_level
|
||||||
self.tag_width = tag_width
|
self.tag_width = tag_width
|
||||||
self.icons = icons or self.DEFAULT_ICONS
|
self.icons = icons or self.DEFAULT_ICONS
|
||||||
self.colors = colors or self.DEFAULT_COLORS
|
self.colors = colors or self.DEFAULT_COLORS
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
|
self.console = Console()
|
||||||
|
|
||||||
# Create log file directory if needed
|
# Create log file directory if needed
|
||||||
if log_file:
|
if log_file:
|
||||||
@@ -143,16 +160,11 @@ class AsyncLogger(AsyncLoggerBase):
|
|||||||
def _write_to_file(self, message: str):
|
def _write_to_file(self, message: str):
|
||||||
"""Write a message to the log file if configured."""
|
"""Write a message to the log file if configured."""
|
||||||
if self.log_file:
|
if self.log_file:
|
||||||
|
text = Text.from_markup(message)
|
||||||
|
plain_text = text.plain
|
||||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||||
with open(self.log_file, "a", encoding="utf-8") as f:
|
with open(self.log_file, "a", encoding="utf-8") as f:
|
||||||
# Strip ANSI color codes for file output
|
f.write(f"[{timestamp}] {plain_text}\n")
|
||||||
clean_message = message.replace(Fore.RESET, "").replace(
|
|
||||||
Style.RESET_ALL, ""
|
|
||||||
)
|
|
||||||
for color in vars(Fore).values():
|
|
||||||
if isinstance(color, str):
|
|
||||||
clean_message = clean_message.replace(color, "")
|
|
||||||
f.write(f"[{timestamp}] {clean_message}\n")
|
|
||||||
|
|
||||||
def _log(
|
def _log(
|
||||||
self,
|
self,
|
||||||
@@ -160,8 +172,9 @@ class AsyncLogger(AsyncLoggerBase):
|
|||||||
message: str,
|
message: str,
|
||||||
tag: str,
|
tag: str,
|
||||||
params: Optional[Dict[str, Any]] = None,
|
params: Optional[Dict[str, Any]] = None,
|
||||||
colors: Optional[Dict[str, str]] = None,
|
colors: Optional[Dict[str, LogColor]] = None,
|
||||||
base_color: Optional[str] = None,
|
boxes: Optional[List[str]] = None,
|
||||||
|
base_color: Optional[LogColor] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@@ -173,55 +186,44 @@ class AsyncLogger(AsyncLoggerBase):
|
|||||||
tag: Tag for the message
|
tag: Tag for the message
|
||||||
params: Parameters to format into the message
|
params: Parameters to format into the message
|
||||||
colors: Color overrides for specific parameters
|
colors: Color overrides for specific parameters
|
||||||
|
boxes: Box overrides for specific parameters
|
||||||
base_color: Base color for the entire message
|
base_color: Base color for the entire message
|
||||||
"""
|
"""
|
||||||
if level.value < self.log_level.value:
|
if level.value < self.log_level.value:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Format the message with parameters if provided
|
# avoid conflict with rich formatting
|
||||||
|
parsed_message = message.replace("[", "[[").replace("]", "]]")
|
||||||
if params:
|
if params:
|
||||||
try:
|
# FIXME: If there are formatting strings in floating point format,
|
||||||
# First format the message with raw parameters
|
# this may result in colors and boxes not being applied properly.
|
||||||
formatted_message = message.format(**params)
|
# such as {value:.2f}, the value is 0.23333 format it to 0.23,
|
||||||
|
# but we replace("0.23333", "[color]0.23333[/color]")
|
||||||
|
formatted_message = parsed_message.format(**params)
|
||||||
|
for key, value in params.items():
|
||||||
|
# value_str may discard `[` and `]`, so we need to replace it.
|
||||||
|
value_str = str(value).replace("[", "[[").replace("]", "]]")
|
||||||
|
# check is need apply color
|
||||||
|
if colors and key in colors:
|
||||||
|
color_str = f"[{colors[key]}]{value_str}[/{colors[key]}]"
|
||||||
|
formatted_message = formatted_message.replace(value_str, color_str)
|
||||||
|
value_str = color_str
|
||||||
|
|
||||||
# Then apply colors if specified
|
# check is need apply box
|
||||||
color_map = {
|
if boxes and key in boxes:
|
||||||
"green": Fore.GREEN,
|
formatted_message = formatted_message.replace(value_str,
|
||||||
"red": Fore.RED,
|
create_box_message(value_str, type=str(level)))
|
||||||
"yellow": Fore.YELLOW,
|
|
||||||
"blue": Fore.BLUE,
|
|
||||||
"cyan": Fore.CYAN,
|
|
||||||
"magenta": Fore.MAGENTA,
|
|
||||||
"white": Fore.WHITE,
|
|
||||||
"black": Fore.BLACK,
|
|
||||||
"reset": Style.RESET_ALL,
|
|
||||||
}
|
|
||||||
if colors:
|
|
||||||
for key, color in colors.items():
|
|
||||||
# Find the formatted value in the message and wrap it with color
|
|
||||||
if color in color_map:
|
|
||||||
color = color_map[color]
|
|
||||||
if key in params:
|
|
||||||
value_str = str(params[key])
|
|
||||||
formatted_message = formatted_message.replace(
|
|
||||||
value_str, f"{color}{value_str}{Style.RESET_ALL}"
|
|
||||||
)
|
|
||||||
|
|
||||||
except KeyError as e:
|
|
||||||
formatted_message = (
|
|
||||||
f"LOGGING ERROR: Missing parameter {e} in message template"
|
|
||||||
)
|
|
||||||
level = LogLevel.ERROR
|
|
||||||
else:
|
else:
|
||||||
formatted_message = message
|
formatted_message = parsed_message
|
||||||
|
|
||||||
# Construct the full log line
|
# Construct the full log line
|
||||||
color = base_color or self.colors[level]
|
color: LogColor = base_color or self.colors[level]
|
||||||
log_line = f"{color}{self._format_tag(tag)} {self._get_icon(tag)} {formatted_message}{Style.RESET_ALL}"
|
log_line = f"[{color}]{self._format_tag(tag)} {self._get_icon(tag)} {formatted_message} [/{color}]"
|
||||||
|
|
||||||
# Output to console if verbose
|
# Output to console if verbose
|
||||||
if self.verbose or kwargs.get("force_verbose", False):
|
if self.verbose or kwargs.get("force_verbose", False):
|
||||||
print(log_line)
|
self.console.print(log_line)
|
||||||
|
|
||||||
# Write to file if configured
|
# Write to file if configured
|
||||||
self._write_to_file(log_line)
|
self._write_to_file(log_line)
|
||||||
@@ -292,8 +294,8 @@ class AsyncLogger(AsyncLoggerBase):
|
|||||||
"timing": timing,
|
"timing": timing,
|
||||||
},
|
},
|
||||||
colors={
|
colors={
|
||||||
"status": Fore.GREEN if success else Fore.RED,
|
"status": LogColor.SUCCESS if success else LogColor.ERROR,
|
||||||
"timing": Fore.YELLOW,
|
"timing": LogColor.WARNING,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ from .__version__ import __version__ as crawl4ai_version
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from colorama import Fore
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, List
|
from typing import Optional, List
|
||||||
import json
|
import json
|
||||||
@@ -44,7 +43,6 @@ from .utils import (
|
|||||||
sanitize_input_encode,
|
sanitize_input_encode,
|
||||||
InvalidCSSSelectorError,
|
InvalidCSSSelectorError,
|
||||||
fast_format_html,
|
fast_format_html,
|
||||||
create_box_message,
|
|
||||||
get_error_context,
|
get_error_context,
|
||||||
RobotsParser,
|
RobotsParser,
|
||||||
preprocess_html_for_schema,
|
preprocess_html_for_schema,
|
||||||
@@ -419,7 +417,7 @@ class AsyncWebCrawler:
|
|||||||
|
|
||||||
self.logger.error_status(
|
self.logger.error_status(
|
||||||
url=url,
|
url=url,
|
||||||
error=create_box_message(error_message, type="error"),
|
error=error_message,
|
||||||
tag="ERROR",
|
tag="ERROR",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -15,12 +15,12 @@ import shutil
|
|||||||
import json
|
import json
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
from typing import List, Dict, Optional, Any, Tuple
|
from typing import List, Dict, Optional, Any
|
||||||
from colorama import Fore, Style, init
|
from rich.console import Console
|
||||||
|
|
||||||
from .async_configs import BrowserConfig
|
from .async_configs import BrowserConfig
|
||||||
from .browser_manager import ManagedBrowser
|
from .browser_manager import ManagedBrowser
|
||||||
from .async_logger import AsyncLogger, AsyncLoggerBase
|
from .async_logger import AsyncLogger, AsyncLoggerBase, LogColor
|
||||||
from .utils import get_home_folder
|
from .utils import get_home_folder
|
||||||
|
|
||||||
|
|
||||||
@@ -45,8 +45,8 @@ class BrowserProfiler:
|
|||||||
logger (AsyncLoggerBase, optional): Logger for outputting messages.
|
logger (AsyncLoggerBase, optional): Logger for outputting messages.
|
||||||
If None, a default AsyncLogger will be created.
|
If None, a default AsyncLogger will be created.
|
||||||
"""
|
"""
|
||||||
# Initialize colorama for colorful terminal output
|
# Initialize rich console for colorful input prompts
|
||||||
init()
|
self.console = Console()
|
||||||
|
|
||||||
# Create a logger if not provided
|
# Create a logger if not provided
|
||||||
if logger is None:
|
if logger is None:
|
||||||
@@ -127,18 +127,18 @@ class BrowserProfiler:
|
|||||||
profile_path = os.path.join(self.profiles_dir, profile_name)
|
profile_path = os.path.join(self.profiles_dir, profile_name)
|
||||||
os.makedirs(profile_path, exist_ok=True)
|
os.makedirs(profile_path, exist_ok=True)
|
||||||
|
|
||||||
# Print instructions for the user with colorama formatting
|
# Print instructions for the user with rich formatting
|
||||||
border = f"{Fore.CYAN}{'='*80}{Style.RESET_ALL}"
|
border = "{'='*80}"
|
||||||
self.logger.info(f"\n{border}", tag="PROFILE")
|
self.logger.info("{border}", tag="PROFILE", params={"border": f"\n{border}"}, colors={"border": LogColor.CYAN})
|
||||||
self.logger.info(f"Creating browser profile: {Fore.GREEN}{profile_name}{Style.RESET_ALL}", tag="PROFILE")
|
self.logger.info("Creating browser profile: {profile_name}", tag="PROFILE", params={"profile_name": profile_name}, colors={"profile_name": LogColor.GREEN})
|
||||||
self.logger.info(f"Profile directory: {Fore.YELLOW}{profile_path}{Style.RESET_ALL}", tag="PROFILE")
|
self.logger.info("Profile directory: {profile_path}", tag="PROFILE", params={"profile_path": profile_path}, colors={"profile_path": LogColor.YELLOW})
|
||||||
|
|
||||||
self.logger.info("\nInstructions:", tag="PROFILE")
|
self.logger.info("\nInstructions:", tag="PROFILE")
|
||||||
self.logger.info("1. A browser window will open for you to set up your profile.", tag="PROFILE")
|
self.logger.info("1. A browser window will open for you to set up your profile.", tag="PROFILE")
|
||||||
self.logger.info(f"2. {Fore.CYAN}Log in to websites{Style.RESET_ALL}, configure settings, etc. as needed.", tag="PROFILE")
|
self.logger.info("{segment}, configure settings, etc. as needed.", tag="PROFILE", params={"segment": "2. Log in to websites"}, colors={"segment": LogColor.CYAN})
|
||||||
self.logger.info(f"3. When you're done, {Fore.YELLOW}press 'q' in this terminal{Style.RESET_ALL} to close the browser.", tag="PROFILE")
|
self.logger.info("3. When you're done, {segment} to close the browser.", tag="PROFILE", params={"segment": "press 'q' in this terminal"}, colors={"segment": LogColor.YELLOW})
|
||||||
self.logger.info("4. The profile will be saved and ready to use with Crawl4AI.", tag="PROFILE")
|
self.logger.info("4. The profile will be saved and ready to use with Crawl4AI.", tag="PROFILE")
|
||||||
self.logger.info(f"{border}\n", tag="PROFILE")
|
self.logger.info("{border}", tag="PROFILE", params={"border": f"{border}\n"}, colors={"border": LogColor.CYAN})
|
||||||
|
|
||||||
browser_config.headless = False
|
browser_config.headless = False
|
||||||
browser_config.user_data_dir = profile_path
|
browser_config.user_data_dir = profile_path
|
||||||
@@ -185,7 +185,7 @@ class BrowserProfiler:
|
|||||||
import select
|
import select
|
||||||
|
|
||||||
# First output the prompt
|
# First output the prompt
|
||||||
self.logger.info(f"{Fore.CYAN}Press '{Fore.WHITE}q{Fore.CYAN}' when you've finished using the browser...{Style.RESET_ALL}", tag="PROFILE")
|
self.logger.info("Press 'q' when you've finished using the browser...", tag="PROFILE")
|
||||||
|
|
||||||
# Save original terminal settings
|
# Save original terminal settings
|
||||||
fd = sys.stdin.fileno()
|
fd = sys.stdin.fileno()
|
||||||
@@ -201,7 +201,7 @@ class BrowserProfiler:
|
|||||||
if readable:
|
if readable:
|
||||||
key = sys.stdin.read(1)
|
key = sys.stdin.read(1)
|
||||||
if key.lower() == 'q':
|
if key.lower() == 'q':
|
||||||
self.logger.info(f"{Fore.GREEN}Closing browser and saving profile...{Style.RESET_ALL}", tag="PROFILE")
|
self.logger.info("Closing browser and saving profile...", tag="PROFILE", base_color=LogColor.GREEN)
|
||||||
user_done_event.set()
|
user_done_event.set()
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -227,7 +227,7 @@ class BrowserProfiler:
|
|||||||
self.logger.error("Failed to start browser process.", tag="PROFILE")
|
self.logger.error("Failed to start browser process.", tag="PROFILE")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
self.logger.info(f"Browser launched. {Fore.CYAN}Waiting for you to finish...{Style.RESET_ALL}", tag="PROFILE")
|
self.logger.info("Browser launched. Waiting for you to finish...", tag="PROFILE")
|
||||||
|
|
||||||
# Start listening for keyboard input
|
# Start listening for keyboard input
|
||||||
listener_task = asyncio.create_task(listen_for_quit_command())
|
listener_task = asyncio.create_task(listen_for_quit_command())
|
||||||
@@ -249,10 +249,10 @@ class BrowserProfiler:
|
|||||||
self.logger.info("Terminating browser process...", tag="PROFILE")
|
self.logger.info("Terminating browser process...", tag="PROFILE")
|
||||||
await managed_browser.cleanup()
|
await managed_browser.cleanup()
|
||||||
|
|
||||||
self.logger.success(f"Browser closed. Profile saved at: {Fore.GREEN}{profile_path}{Style.RESET_ALL}", tag="PROFILE")
|
self.logger.success(f"Browser closed. Profile saved at: {profile_path}", tag="PROFILE")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error creating profile: {str(e)}", tag="PROFILE")
|
self.logger.error(f"Error creating profile: {e!s}", tag="PROFILE")
|
||||||
await managed_browser.cleanup()
|
await managed_browser.cleanup()
|
||||||
return None
|
return None
|
||||||
finally:
|
finally:
|
||||||
@@ -444,25 +444,27 @@ class BrowserProfiler:
|
|||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
while True:
|
while True:
|
||||||
self.logger.info(f"\n{Fore.CYAN}Profile Management Options:{Style.RESET_ALL}", tag="MENU")
|
self.logger.info("\nProfile Management Options:", tag="MENU")
|
||||||
self.logger.info(f"1. {Fore.GREEN}Create a new profile{Style.RESET_ALL}", tag="MENU")
|
self.logger.info("1. Create a new profile", tag="MENU", base_color=LogColor.GREEN)
|
||||||
self.logger.info(f"2. {Fore.YELLOW}List available profiles{Style.RESET_ALL}", tag="MENU")
|
self.logger.info("2. List available profiles", tag="MENU", base_color=LogColor.YELLOW)
|
||||||
self.logger.info(f"3. {Fore.RED}Delete a profile{Style.RESET_ALL}", tag="MENU")
|
self.logger.info("3. Delete a profile", tag="MENU", base_color=LogColor.RED)
|
||||||
|
|
||||||
# Only show crawl option if callback provided
|
# Only show crawl option if callback provided
|
||||||
if crawl_callback:
|
if crawl_callback:
|
||||||
self.logger.info(f"4. {Fore.CYAN}Use a profile to crawl a website{Style.RESET_ALL}", tag="MENU")
|
self.logger.info("4. Use a profile to crawl a website", tag="MENU", base_color=LogColor.CYAN)
|
||||||
self.logger.info(f"5. {Fore.MAGENTA}Exit{Style.RESET_ALL}", tag="MENU")
|
self.logger.info("5. Exit", tag="MENU", base_color=LogColor.MAGENTA)
|
||||||
exit_option = "5"
|
exit_option = "5"
|
||||||
else:
|
else:
|
||||||
self.logger.info(f"4. {Fore.MAGENTA}Exit{Style.RESET_ALL}", tag="MENU")
|
self.logger.info("4. Exit", tag="MENU", base_color=LogColor.MAGENTA)
|
||||||
exit_option = "4"
|
exit_option = "4"
|
||||||
|
|
||||||
choice = input(f"\n{Fore.CYAN}Enter your choice (1-{exit_option}): {Style.RESET_ALL}")
|
self.logger.print(f"\n[cyan]Enter your choice (1-{exit_option}): [/cyan]", end="")
|
||||||
|
choice = input()
|
||||||
|
|
||||||
if choice == "1":
|
if choice == "1":
|
||||||
# Create new profile
|
# Create new profile
|
||||||
name = input(f"{Fore.GREEN}Enter a name for the new profile (or press Enter for auto-generated name): {Style.RESET_ALL}")
|
self.console.print("[green]Enter a name for the new profile (or press Enter for auto-generated name): [/green]", end="")
|
||||||
|
name = input()
|
||||||
await self.create_profile(name or None)
|
await self.create_profile(name or None)
|
||||||
|
|
||||||
elif choice == "2":
|
elif choice == "2":
|
||||||
@@ -476,8 +478,8 @@ class BrowserProfiler:
|
|||||||
# Print profile information with colorama formatting
|
# Print profile information with colorama formatting
|
||||||
self.logger.info("\nAvailable profiles:", tag="PROFILES")
|
self.logger.info("\nAvailable profiles:", tag="PROFILES")
|
||||||
for i, profile in enumerate(profiles):
|
for i, profile in enumerate(profiles):
|
||||||
self.logger.info(f"[{i+1}] {Fore.CYAN}{profile['name']}{Style.RESET_ALL}", tag="PROFILES")
|
self.logger.info(f"[{i+1}] {profile['name']}", tag="PROFILES")
|
||||||
self.logger.info(f" Path: {Fore.YELLOW}{profile['path']}{Style.RESET_ALL}", tag="PROFILES")
|
self.logger.info(f" Path: {profile['path']}", tag="PROFILES", base_color=LogColor.YELLOW)
|
||||||
self.logger.info(f" Created: {profile['created'].strftime('%Y-%m-%d %H:%M:%S')}", tag="PROFILES")
|
self.logger.info(f" Created: {profile['created'].strftime('%Y-%m-%d %H:%M:%S')}", tag="PROFILES")
|
||||||
self.logger.info(f" Browser type: {profile['type']}", tag="PROFILES")
|
self.logger.info(f" Browser type: {profile['type']}", tag="PROFILES")
|
||||||
self.logger.info("", tag="PROFILES") # Empty line for spacing
|
self.logger.info("", tag="PROFILES") # Empty line for spacing
|
||||||
@@ -490,12 +492,13 @@ class BrowserProfiler:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Display numbered list
|
# Display numbered list
|
||||||
self.logger.info(f"\n{Fore.YELLOW}Available profiles:{Style.RESET_ALL}", tag="PROFILES")
|
self.logger.info("\nAvailable profiles:", tag="PROFILES", base_color=LogColor.YELLOW)
|
||||||
for i, profile in enumerate(profiles):
|
for i, profile in enumerate(profiles):
|
||||||
self.logger.info(f"[{i+1}] {profile['name']}", tag="PROFILES")
|
self.logger.info(f"[{i+1}] {profile['name']}", tag="PROFILES")
|
||||||
|
|
||||||
# Get profile to delete
|
# Get profile to delete
|
||||||
profile_idx = input(f"{Fore.RED}Enter the number of the profile to delete (or 'c' to cancel): {Style.RESET_ALL}")
|
self.console.print("[red]Enter the number of the profile to delete (or 'c' to cancel): [/red]", end="")
|
||||||
|
profile_idx = input()
|
||||||
if profile_idx.lower() == 'c':
|
if profile_idx.lower() == 'c':
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -503,17 +506,18 @@ class BrowserProfiler:
|
|||||||
idx = int(profile_idx) - 1
|
idx = int(profile_idx) - 1
|
||||||
if 0 <= idx < len(profiles):
|
if 0 <= idx < len(profiles):
|
||||||
profile_name = profiles[idx]["name"]
|
profile_name = profiles[idx]["name"]
|
||||||
self.logger.info(f"Deleting profile: {Fore.YELLOW}{profile_name}{Style.RESET_ALL}", tag="PROFILES")
|
self.logger.info(f"Deleting profile: [yellow]{profile_name}[/yellow]", tag="PROFILES")
|
||||||
|
|
||||||
# Confirm deletion
|
# Confirm deletion
|
||||||
confirm = input(f"{Fore.RED}Are you sure you want to delete this profile? (y/n): {Style.RESET_ALL}")
|
self.console.print("[red]Are you sure you want to delete this profile? (y/n): [/red]", end="")
|
||||||
|
confirm = input()
|
||||||
if confirm.lower() == 'y':
|
if confirm.lower() == 'y':
|
||||||
success = self.delete_profile(profiles[idx]["path"])
|
success = self.delete_profile(profiles[idx]["path"])
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
self.logger.success(f"Profile {Fore.GREEN}{profile_name}{Style.RESET_ALL} deleted successfully", tag="PROFILES")
|
self.logger.success(f"Profile {profile_name} deleted successfully", tag="PROFILES")
|
||||||
else:
|
else:
|
||||||
self.logger.error(f"Failed to delete profile {Fore.RED}{profile_name}{Style.RESET_ALL}", tag="PROFILES")
|
self.logger.error(f"Failed to delete profile {profile_name}", tag="PROFILES")
|
||||||
else:
|
else:
|
||||||
self.logger.error("Invalid profile number", tag="PROFILES")
|
self.logger.error("Invalid profile number", tag="PROFILES")
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -527,12 +531,13 @@ class BrowserProfiler:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Display numbered list
|
# Display numbered list
|
||||||
self.logger.info(f"\n{Fore.YELLOW}Available profiles:{Style.RESET_ALL}", tag="PROFILES")
|
self.logger.info("\nAvailable profiles:", tag="PROFILES", base_color=LogColor.YELLOW)
|
||||||
for i, profile in enumerate(profiles):
|
for i, profile in enumerate(profiles):
|
||||||
self.logger.info(f"[{i+1}] {profile['name']}", tag="PROFILES")
|
self.logger.info(f"[{i+1}] {profile['name']}", tag="PROFILES")
|
||||||
|
|
||||||
# Get profile to use
|
# Get profile to use
|
||||||
profile_idx = input(f"{Fore.CYAN}Enter the number of the profile to use (or 'c' to cancel): {Style.RESET_ALL}")
|
self.console.print("[cyan]Enter the number of the profile to use (or 'c' to cancel): [/cyan]", end="")
|
||||||
|
profile_idx = input()
|
||||||
if profile_idx.lower() == 'c':
|
if profile_idx.lower() == 'c':
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -540,7 +545,8 @@ class BrowserProfiler:
|
|||||||
idx = int(profile_idx) - 1
|
idx = int(profile_idx) - 1
|
||||||
if 0 <= idx < len(profiles):
|
if 0 <= idx < len(profiles):
|
||||||
profile_path = profiles[idx]["path"]
|
profile_path = profiles[idx]["path"]
|
||||||
url = input(f"{Fore.CYAN}Enter the URL to crawl: {Style.RESET_ALL}")
|
self.console.print("[cyan]Enter the URL to crawl: [/cyan]", end="")
|
||||||
|
url = input()
|
||||||
if url:
|
if url:
|
||||||
# Call the provided crawl callback
|
# Call the provided crawl callback
|
||||||
await crawl_callback(profile_path, url)
|
await crawl_callback(profile_path, url)
|
||||||
@@ -603,11 +609,11 @@ class BrowserProfiler:
|
|||||||
# Print initial information
|
# Print initial information
|
||||||
border = f"{Fore.CYAN}{'='*80}{Style.RESET_ALL}"
|
border = f"{Fore.CYAN}{'='*80}{Style.RESET_ALL}"
|
||||||
self.logger.info(f"\n{border}", tag="CDP")
|
self.logger.info(f"\n{border}", tag="CDP")
|
||||||
self.logger.info(f"Launching standalone browser with CDP debugging", tag="CDP")
|
self.logger.info("Launching standalone browser with CDP debugging", tag="CDP")
|
||||||
self.logger.info(f"Browser type: {Fore.GREEN}{browser_type}{Style.RESET_ALL}", tag="CDP")
|
self.logger.info("Browser type: {browser_type}", tag="CDP", params={"browser_type": browser_type}, colors={"browser_type": LogColor.CYAN})
|
||||||
self.logger.info(f"Profile path: {Fore.YELLOW}{profile_path}{Style.RESET_ALL}", tag="CDP")
|
self.logger.info("Profile path: {profile_path}", tag="CDP", params={"profile_path": profile_path}, colors={"profile_path": LogColor.YELLOW})
|
||||||
self.logger.info(f"Debugging port: {Fore.CYAN}{debugging_port}{Style.RESET_ALL}", tag="CDP")
|
self.logger.info(f"Debugging port: {debugging_port}", tag="CDP")
|
||||||
self.logger.info(f"Headless mode: {Fore.CYAN}{headless}{Style.RESET_ALL}", tag="CDP")
|
self.logger.info(f"Headless mode: {headless}", tag="CDP")
|
||||||
|
|
||||||
# Create managed browser instance
|
# Create managed browser instance
|
||||||
managed_browser = ManagedBrowser(
|
managed_browser = ManagedBrowser(
|
||||||
@@ -650,7 +656,7 @@ class BrowserProfiler:
|
|||||||
import select
|
import select
|
||||||
|
|
||||||
# First output the prompt
|
# First output the prompt
|
||||||
self.logger.info(f"{Fore.CYAN}Press '{Fore.WHITE}q{Fore.CYAN}' to stop the browser and exit...{Style.RESET_ALL}", tag="CDP")
|
self.logger.info("Press 'q' to stop the browser and exit...", tag="CDP")
|
||||||
|
|
||||||
# Save original terminal settings
|
# Save original terminal settings
|
||||||
fd = sys.stdin.fileno()
|
fd = sys.stdin.fileno()
|
||||||
@@ -666,7 +672,7 @@ class BrowserProfiler:
|
|||||||
if readable:
|
if readable:
|
||||||
key = sys.stdin.read(1)
|
key = sys.stdin.read(1)
|
||||||
if key.lower() == 'q':
|
if key.lower() == 'q':
|
||||||
self.logger.info(f"{Fore.GREEN}Closing browser...{Style.RESET_ALL}", tag="CDP")
|
self.logger.info("Closing browser...", tag="CDP")
|
||||||
user_done_event.set()
|
user_done_event.set()
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -720,20 +726,20 @@ class BrowserProfiler:
|
|||||||
self.logger.error("Failed to start browser process.", tag="CDP")
|
self.logger.error("Failed to start browser process.", tag="CDP")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
self.logger.info(f"Browser launched successfully. Retrieving CDP information...", tag="CDP")
|
self.logger.info("Browser launched successfully. Retrieving CDP information...", tag="CDP")
|
||||||
|
|
||||||
# Get CDP URL and JSON config
|
# Get CDP URL and JSON config
|
||||||
cdp_url, config_json = await get_cdp_json(debugging_port)
|
cdp_url, config_json = await get_cdp_json(debugging_port)
|
||||||
|
|
||||||
if cdp_url:
|
if cdp_url:
|
||||||
self.logger.success(f"CDP URL: {Fore.GREEN}{cdp_url}{Style.RESET_ALL}", tag="CDP")
|
self.logger.success(f"CDP URL: {cdp_url}", tag="CDP")
|
||||||
|
|
||||||
if config_json:
|
if config_json:
|
||||||
# Display relevant CDP information
|
# Display relevant CDP information
|
||||||
self.logger.info(f"Browser: {Fore.CYAN}{config_json.get('Browser', 'Unknown')}{Style.RESET_ALL}", tag="CDP")
|
self.logger.info(f"Browser: {config_json.get('Browser', 'Unknown')}", tag="CDP", colors={"Browser": LogColor.CYAN})
|
||||||
self.logger.info(f"Protocol Version: {config_json.get('Protocol-Version', 'Unknown')}", tag="CDP")
|
self.logger.info(f"Protocol Version: {config_json.get('Protocol-Version', 'Unknown')}", tag="CDP", colors={"Protocol-Version": LogColor.CYAN})
|
||||||
if 'webSocketDebuggerUrl' in config_json:
|
if 'webSocketDebuggerUrl' in config_json:
|
||||||
self.logger.info(f"WebSocket URL: {Fore.GREEN}{config_json['webSocketDebuggerUrl']}{Style.RESET_ALL}", tag="CDP")
|
self.logger.info("WebSocket URL: {webSocketDebuggerUrl}", tag="CDP", params={"webSocketDebuggerUrl": config_json['webSocketDebuggerUrl']}, colors={"webSocketDebuggerUrl": LogColor.GREEN})
|
||||||
else:
|
else:
|
||||||
self.logger.warning("Could not retrieve CDP configuration JSON", tag="CDP")
|
self.logger.warning("Could not retrieve CDP configuration JSON", tag="CDP")
|
||||||
else:
|
else:
|
||||||
@@ -761,7 +767,7 @@ class BrowserProfiler:
|
|||||||
self.logger.info("Terminating browser process...", tag="CDP")
|
self.logger.info("Terminating browser process...", tag="CDP")
|
||||||
await managed_browser.cleanup()
|
await managed_browser.cleanup()
|
||||||
|
|
||||||
self.logger.success(f"Browser closed.", tag="CDP")
|
self.logger.success("Browser closed.", tag="CDP")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error launching standalone browser: {str(e)}", tag="CDP")
|
self.logger.error(f"Error launching standalone browser: {str(e)}", tag="CDP")
|
||||||
|
|||||||
@@ -27,8 +27,7 @@ import json
|
|||||||
import hashlib
|
import hashlib
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from .async_logger import AsyncLogger, LogLevel
|
from .async_logger import AsyncLogger, LogLevel, LogColor
|
||||||
from colorama import Fore, Style
|
|
||||||
|
|
||||||
|
|
||||||
class RelevantContentFilter(ABC):
|
class RelevantContentFilter(ABC):
|
||||||
@@ -846,8 +845,7 @@ class LLMContentFilter(RelevantContentFilter):
|
|||||||
},
|
},
|
||||||
colors={
|
colors={
|
||||||
**AsyncLogger.DEFAULT_COLORS,
|
**AsyncLogger.DEFAULT_COLORS,
|
||||||
LogLevel.INFO: Fore.MAGENTA
|
LogLevel.INFO: LogColor.DIM_MAGENTA # Dimmed purple for LLM ops
|
||||||
+ Style.DIM, # Dimmed purple for LLM ops
|
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -892,7 +890,7 @@ class LLMContentFilter(RelevantContentFilter):
|
|||||||
"Starting LLM markdown content filtering process",
|
"Starting LLM markdown content filtering process",
|
||||||
tag="LLM",
|
tag="LLM",
|
||||||
params={"provider": self.llm_config.provider},
|
params={"provider": self.llm_config.provider},
|
||||||
colors={"provider": Fore.CYAN},
|
colors={"provider": LogColor.CYAN},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Cache handling
|
# Cache handling
|
||||||
@@ -929,7 +927,7 @@ class LLMContentFilter(RelevantContentFilter):
|
|||||||
"LLM markdown: Split content into {chunk_count} chunks",
|
"LLM markdown: Split content into {chunk_count} chunks",
|
||||||
tag="CHUNK",
|
tag="CHUNK",
|
||||||
params={"chunk_count": len(html_chunks)},
|
params={"chunk_count": len(html_chunks)},
|
||||||
colors={"chunk_count": Fore.YELLOW},
|
colors={"chunk_count": LogColor.YELLOW},
|
||||||
)
|
)
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
@@ -1038,7 +1036,7 @@ class LLMContentFilter(RelevantContentFilter):
|
|||||||
"LLM markdown: Completed processing in {time:.2f}s",
|
"LLM markdown: Completed processing in {time:.2f}s",
|
||||||
tag="LLM",
|
tag="LLM",
|
||||||
params={"time": end_time - start_time},
|
params={"time": end_time - start_time},
|
||||||
colors={"time": Fore.YELLOW},
|
colors={"time": LogColor.YELLOW},
|
||||||
)
|
)
|
||||||
|
|
||||||
result = ordered_results if ordered_results else []
|
result = ordered_results if ordered_results else []
|
||||||
|
|||||||
@@ -20,7 +20,6 @@ from urllib.parse import urljoin
|
|||||||
import requests
|
import requests
|
||||||
from requests.exceptions import InvalidSchema
|
from requests.exceptions import InvalidSchema
|
||||||
import xxhash
|
import xxhash
|
||||||
from colorama import Fore, Style, init
|
|
||||||
import textwrap
|
import textwrap
|
||||||
import cProfile
|
import cProfile
|
||||||
import pstats
|
import pstats
|
||||||
@@ -441,14 +440,13 @@ def create_box_message(
|
|||||||
str: A formatted string containing the styled message box.
|
str: A formatted string containing the styled message box.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
init()
|
|
||||||
|
|
||||||
# Define border and text colors for different types
|
# Define border and text colors for different types
|
||||||
styles = {
|
styles = {
|
||||||
"warning": (Fore.YELLOW, Fore.LIGHTYELLOW_EX, "⚠"),
|
"warning": ("yellow", "bright_yellow", "⚠"),
|
||||||
"info": (Fore.BLUE, Fore.LIGHTBLUE_EX, "ℹ"),
|
"info": ("blue", "bright_blue", "ℹ"),
|
||||||
"success": (Fore.GREEN, Fore.LIGHTGREEN_EX, "✓"),
|
"debug": ("lightblack", "bright_black", "⋯"),
|
||||||
"error": (Fore.RED, Fore.LIGHTRED_EX, "×"),
|
"success": ("green", "bright_green", "✓"),
|
||||||
|
"error": ("red", "bright_red", "×"),
|
||||||
}
|
}
|
||||||
|
|
||||||
border_color, text_color, prefix = styles.get(type.lower(), styles["info"])
|
border_color, text_color, prefix = styles.get(type.lower(), styles["info"])
|
||||||
@@ -480,12 +478,12 @@ def create_box_message(
|
|||||||
# Create the box with colored borders and lighter text
|
# Create the box with colored borders and lighter text
|
||||||
horizontal_line = h_line * (width - 1)
|
horizontal_line = h_line * (width - 1)
|
||||||
box = [
|
box = [
|
||||||
f"{border_color}{tl}{horizontal_line}{tr}",
|
f"[{border_color}]{tl}{horizontal_line}{tr}[/{border_color}]",
|
||||||
*[
|
*[
|
||||||
f"{border_color}{v_line}{text_color} {line:<{width-2}}{border_color}{v_line}"
|
f"[{border_color}]{v_line}[{text_color}] {line:<{width-2}}[/{text_color}][{border_color}]{v_line}[/{border_color}]"
|
||||||
for line in formatted_lines
|
for line in formatted_lines
|
||||||
],
|
],
|
||||||
f"{border_color}{bl}{horizontal_line}{br}{Style.RESET_ALL}",
|
f"[{border_color}]{bl}{horizontal_line}{br}[/{border_color}]",
|
||||||
]
|
]
|
||||||
|
|
||||||
result = "\n".join(box)
|
result = "\n".join(box)
|
||||||
@@ -2778,4 +2776,3 @@ def preprocess_html_for_schema(html_content, text_threshold=100, attr_value_thre
|
|||||||
# Fallback for parsing errors
|
# Fallback for parsing errors
|
||||||
return html_content[:max_size] if len(html_content) > max_size else html_content
|
return html_content[:max_size] if len(html_content) > max_size else html_content
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user