fix(models): support float timestamps in CrawlStats

Modify CrawlStats class to handle both datetime and float timestamp formats for start_time and end_time fields. This change improves compatibility with different time formats while maintaining existing functionality.

Other minor changes:
- Add datetime import in async_dispatcher
- Update JsonElementExtractionStrategy kwargs handling

No breaking changes.
This commit is contained in:
UncleCode
2025-03-06 20:30:57 +08:00
parent 2327db6fdc
commit 29f7915b79
3 changed files with 38 additions and 7 deletions

View File

@@ -13,7 +13,7 @@ from rich.live import Live
from rich.table import Table from rich.table import Table
from rich.console import Console from rich.console import Console
from rich import box from rich import box
from datetime import timedelta from datetime import timedelta, datetime
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
import time import time
import psutil import psutil

View File

@@ -1064,7 +1064,7 @@ class JsonElementExtractionStrategy(ExtractionStrategy):
api_token (str): Legacy Parameter. API token for LLM provider api_token (str): Legacy Parameter. API token for LLM provider
llm_config (LLMConfig): LLM configuration object llm_config (LLMConfig): LLM configuration object
prompt (str, optional): Custom prompt template to use prompt (str, optional): Custom prompt template to use
**kwargs: Additional args passed to perform_completion_with_backoff **kwargs: Additional args passed to LLM processor
Returns: Returns:
dict: Generated schema following the JsonElementExtractionStrategy format dict: Generated schema following the JsonElementExtractionStrategy format
@@ -1130,7 +1130,8 @@ In this scenario, use your best judgment to generate the schema. Try to maximize
prompt_with_variables="\n\n".join([system_message["content"], user_message["content"]]), prompt_with_variables="\n\n".join([system_message["content"], user_message["content"]]),
json_response = True, json_response = True,
api_token=llm_config.api_token, api_token=llm_config.api_token,
**kwargs base_url=llm_config.base_url,
extra_args=kwargs
) )
# Extract and return schema # Extract and return schema

View File

@@ -37,13 +37,33 @@ class CrawlStatus(Enum):
FAILED = "FAILED" FAILED = "FAILED"
# @dataclass
# class CrawlStats:
# task_id: str
# url: str
# status: CrawlStatus
# start_time: Optional[datetime] = None
# end_time: Optional[datetime] = None
# memory_usage: float = 0.0
# peak_memory: float = 0.0
# error_message: str = ""
# @property
# def duration(self) -> str:
# if not self.start_time:
# return "0:00"
# end = self.end_time or datetime.now()
# duration = end - self.start_time
# return str(timedelta(seconds=int(duration.total_seconds())))
@dataclass @dataclass
class CrawlStats: class CrawlStats:
task_id: str task_id: str
url: str url: str
status: CrawlStatus status: CrawlStatus
start_time: Optional[datetime] = None start_time: Optional[Union[datetime, float]] = None
end_time: Optional[datetime] = None end_time: Optional[Union[datetime, float]] = None
memory_usage: float = 0.0 memory_usage: float = 0.0
peak_memory: float = 0.0 peak_memory: float = 0.0
error_message: str = "" error_message: str = ""
@@ -52,11 +72,21 @@ class CrawlStats:
def duration(self) -> str: def duration(self) -> str:
if not self.start_time: if not self.start_time:
return "0:00" return "0:00"
# Convert start_time to datetime if it's a float
start = self.start_time
if isinstance(start, float):
start = datetime.fromtimestamp(start)
# Get end time or use current time
end = self.end_time or datetime.now() end = self.end_time or datetime.now()
duration = end - self.start_time # Convert end_time to datetime if it's a float
if isinstance(end, float):
end = datetime.fromtimestamp(end)
duration = end - start
return str(timedelta(seconds=int(duration.total_seconds()))) return str(timedelta(seconds=int(duration.total_seconds())))
class DisplayMode(Enum): class DisplayMode(Enum):
DETAILED = "DETAILED" DETAILED = "DETAILED"
AGGREGATED = "AGGREGATED" AGGREGATED = "AGGREGATED"