fix(models): support float timestamps in CrawlStats

Modify CrawlStats class to handle both datetime and float timestamp formats for start_time and end_time fields. This change improves compatibility with different time formats while maintaining existing functionality.

Other minor changes:
- Add datetime import in async_dispatcher
- Update JsonElementExtractionStrategy kwargs handling

No breaking changes.
This commit is contained in:
UncleCode
2025-03-06 20:30:57 +08:00
parent 2327db6fdc
commit 29f7915b79
3 changed files with 38 additions and 7 deletions

View File

@@ -13,7 +13,7 @@ from rich.live import Live
from rich.table import Table
from rich.console import Console
from rich import box
from datetime import timedelta
from datetime import timedelta, datetime
from collections.abc import AsyncGenerator
import time
import psutil

View File

@@ -1064,7 +1064,7 @@ class JsonElementExtractionStrategy(ExtractionStrategy):
api_token (str): Legacy Parameter. API token for LLM provider
llm_config (LLMConfig): LLM configuration object
prompt (str, optional): Custom prompt template to use
**kwargs: Additional args passed to perform_completion_with_backoff
**kwargs: Additional args passed to LLM processor
Returns:
dict: Generated schema following the JsonElementExtractionStrategy format
@@ -1130,7 +1130,8 @@ In this scenario, use your best judgment to generate the schema. Try to maximize
prompt_with_variables="\n\n".join([system_message["content"], user_message["content"]]),
json_response = True,
api_token=llm_config.api_token,
**kwargs
base_url=llm_config.base_url,
extra_args=kwargs
)
# Extract and return schema

View File

@@ -37,13 +37,33 @@ class CrawlStatus(Enum):
FAILED = "FAILED"
# @dataclass
# class CrawlStats:
# task_id: str
# url: str
# status: CrawlStatus
# start_time: Optional[datetime] = None
# end_time: Optional[datetime] = None
# memory_usage: float = 0.0
# peak_memory: float = 0.0
# error_message: str = ""
# @property
# def duration(self) -> str:
# if not self.start_time:
# return "0:00"
# end = self.end_time or datetime.now()
# duration = end - self.start_time
# return str(timedelta(seconds=int(duration.total_seconds())))
@dataclass
class CrawlStats:
task_id: str
url: str
status: CrawlStatus
start_time: Optional[datetime] = None
end_time: Optional[datetime] = None
start_time: Optional[Union[datetime, float]] = None
end_time: Optional[Union[datetime, float]] = None
memory_usage: float = 0.0
peak_memory: float = 0.0
error_message: str = ""
@@ -52,11 +72,21 @@ class CrawlStats:
def duration(self) -> str:
if not self.start_time:
return "0:00"
# Convert start_time to datetime if it's a float
start = self.start_time
if isinstance(start, float):
start = datetime.fromtimestamp(start)
# Get end time or use current time
end = self.end_time or datetime.now()
duration = end - self.start_time
# Convert end_time to datetime if it's a float
if isinstance(end, float):
end = datetime.fromtimestamp(end)
duration = end - start
return str(timedelta(seconds=int(duration.total_seconds())))
class DisplayMode(Enum):
DETAILED = "DETAILED"
AGGREGATED = "AGGREGATED"