From 29f7915b795418bbc8dec9218fa8e9acae167885 Mon Sep 17 00:00:00 2001 From: UncleCode Date: Thu, 6 Mar 2025 20:30:57 +0800 Subject: [PATCH] fix(models): support float timestamps in CrawlStats Modify CrawlStats class to handle both datetime and float timestamp formats for start_time and end_time fields. This change improves compatibility with different time formats while maintaining existing functionality. Other minor changes: - Add datetime import in async_dispatcher - Update JsonElementExtractionStrategy kwargs handling No breaking changes. --- crawl4ai/async_dispatcher.py | 2 +- crawl4ai/extraction_strategy.py | 5 +++-- crawl4ai/models.py | 38 +++++++++++++++++++++++++++++---- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/crawl4ai/async_dispatcher.py b/crawl4ai/async_dispatcher.py index 69d276fb..b587d011 100644 --- a/crawl4ai/async_dispatcher.py +++ b/crawl4ai/async_dispatcher.py @@ -13,7 +13,7 @@ from rich.live import Live from rich.table import Table from rich.console import Console from rich import box -from datetime import timedelta +from datetime import timedelta, datetime from collections.abc import AsyncGenerator import time import psutil diff --git a/crawl4ai/extraction_strategy.py b/crawl4ai/extraction_strategy.py index e0e49d99..3b708421 100644 --- a/crawl4ai/extraction_strategy.py +++ b/crawl4ai/extraction_strategy.py @@ -1064,7 +1064,7 @@ class JsonElementExtractionStrategy(ExtractionStrategy): api_token (str): Legacy Parameter. API token for LLM provider llm_config (LLMConfig): LLM configuration object prompt (str, optional): Custom prompt template to use - **kwargs: Additional args passed to perform_completion_with_backoff + **kwargs: Additional args passed to LLM processor Returns: dict: Generated schema following the JsonElementExtractionStrategy format @@ -1130,7 +1130,8 @@ In this scenario, use your best judgment to generate the schema. Try to maximize prompt_with_variables="\n\n".join([system_message["content"], user_message["content"]]), json_response = True, api_token=llm_config.api_token, - **kwargs + base_url=llm_config.base_url, + extra_args=kwargs ) # Extract and return schema diff --git a/crawl4ai/models.py b/crawl4ai/models.py index ef9efc06..c1caff94 100644 --- a/crawl4ai/models.py +++ b/crawl4ai/models.py @@ -37,13 +37,33 @@ class CrawlStatus(Enum): FAILED = "FAILED" +# @dataclass +# class CrawlStats: +# task_id: str +# url: str +# status: CrawlStatus +# start_time: Optional[datetime] = None +# end_time: Optional[datetime] = None +# memory_usage: float = 0.0 +# peak_memory: float = 0.0 +# error_message: str = "" + +# @property +# def duration(self) -> str: +# if not self.start_time: +# return "0:00" +# end = self.end_time or datetime.now() +# duration = end - self.start_time +# return str(timedelta(seconds=int(duration.total_seconds()))) + + @dataclass class CrawlStats: task_id: str url: str status: CrawlStatus - start_time: Optional[datetime] = None - end_time: Optional[datetime] = None + start_time: Optional[Union[datetime, float]] = None + end_time: Optional[Union[datetime, float]] = None memory_usage: float = 0.0 peak_memory: float = 0.0 error_message: str = "" @@ -52,11 +72,21 @@ class CrawlStats: def duration(self) -> str: if not self.start_time: return "0:00" + + # Convert start_time to datetime if it's a float + start = self.start_time + if isinstance(start, float): + start = datetime.fromtimestamp(start) + + # Get end time or use current time end = self.end_time or datetime.now() - duration = end - self.start_time + # Convert end_time to datetime if it's a float + if isinstance(end, float): + end = datetime.fromtimestamp(end) + + duration = end - start return str(timedelta(seconds=int(duration.total_seconds()))) - class DisplayMode(Enum): DETAILED = "DETAILED" AGGREGATED = "AGGREGATED"