diff --git a/crawl4ai/async_dispatcher.py b/crawl4ai/async_dispatcher.py index 69d276fb..b587d011 100644 --- a/crawl4ai/async_dispatcher.py +++ b/crawl4ai/async_dispatcher.py @@ -13,7 +13,7 @@ from rich.live import Live from rich.table import Table from rich.console import Console from rich import box -from datetime import timedelta +from datetime import timedelta, datetime from collections.abc import AsyncGenerator import time import psutil diff --git a/crawl4ai/extraction_strategy.py b/crawl4ai/extraction_strategy.py index e0e49d99..3b708421 100644 --- a/crawl4ai/extraction_strategy.py +++ b/crawl4ai/extraction_strategy.py @@ -1064,7 +1064,7 @@ class JsonElementExtractionStrategy(ExtractionStrategy): api_token (str): Legacy Parameter. API token for LLM provider llm_config (LLMConfig): LLM configuration object prompt (str, optional): Custom prompt template to use - **kwargs: Additional args passed to perform_completion_with_backoff + **kwargs: Additional args passed to LLM processor Returns: dict: Generated schema following the JsonElementExtractionStrategy format @@ -1130,7 +1130,8 @@ In this scenario, use your best judgment to generate the schema. Try to maximize prompt_with_variables="\n\n".join([system_message["content"], user_message["content"]]), json_response = True, api_token=llm_config.api_token, - **kwargs + base_url=llm_config.base_url, + extra_args=kwargs ) # Extract and return schema diff --git a/crawl4ai/models.py b/crawl4ai/models.py index ef9efc06..c1caff94 100644 --- a/crawl4ai/models.py +++ b/crawl4ai/models.py @@ -37,13 +37,33 @@ class CrawlStatus(Enum): FAILED = "FAILED" +# @dataclass +# class CrawlStats: +# task_id: str +# url: str +# status: CrawlStatus +# start_time: Optional[datetime] = None +# end_time: Optional[datetime] = None +# memory_usage: float = 0.0 +# peak_memory: float = 0.0 +# error_message: str = "" + +# @property +# def duration(self) -> str: +# if not self.start_time: +# return "0:00" +# end = self.end_time or datetime.now() +# duration = end - self.start_time +# return str(timedelta(seconds=int(duration.total_seconds()))) + + @dataclass class CrawlStats: task_id: str url: str status: CrawlStatus - start_time: Optional[datetime] = None - end_time: Optional[datetime] = None + start_time: Optional[Union[datetime, float]] = None + end_time: Optional[Union[datetime, float]] = None memory_usage: float = 0.0 peak_memory: float = 0.0 error_message: str = "" @@ -52,11 +72,21 @@ class CrawlStats: def duration(self) -> str: if not self.start_time: return "0:00" + + # Convert start_time to datetime if it's a float + start = self.start_time + if isinstance(start, float): + start = datetime.fromtimestamp(start) + + # Get end time or use current time end = self.end_time or datetime.now() - duration = end - self.start_time + # Convert end_time to datetime if it's a float + if isinstance(end, float): + end = datetime.fromtimestamp(end) + + duration = end - start return str(timedelta(seconds=int(duration.total_seconds()))) - class DisplayMode(Enum): DETAILED = "DETAILED" AGGREGATED = "AGGREGATED"