fix(models): support float timestamps in CrawlStats
Modify CrawlStats class to handle both datetime and float timestamp formats for start_time and end_time fields. This change improves compatibility with different time formats while maintaining existing functionality. Other minor changes: - Add datetime import in async_dispatcher - Update JsonElementExtractionStrategy kwargs handling No breaking changes.
This commit is contained in:
@@ -13,7 +13,7 @@ from rich.live import Live
|
|||||||
from rich.table import Table
|
from rich.table import Table
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
from rich import box
|
from rich import box
|
||||||
from datetime import timedelta
|
from datetime import timedelta, datetime
|
||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
import time
|
import time
|
||||||
import psutil
|
import psutil
|
||||||
|
|||||||
@@ -1064,7 +1064,7 @@ class JsonElementExtractionStrategy(ExtractionStrategy):
|
|||||||
api_token (str): Legacy Parameter. API token for LLM provider
|
api_token (str): Legacy Parameter. API token for LLM provider
|
||||||
llm_config (LLMConfig): LLM configuration object
|
llm_config (LLMConfig): LLM configuration object
|
||||||
prompt (str, optional): Custom prompt template to use
|
prompt (str, optional): Custom prompt template to use
|
||||||
**kwargs: Additional args passed to perform_completion_with_backoff
|
**kwargs: Additional args passed to LLM processor
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: Generated schema following the JsonElementExtractionStrategy format
|
dict: Generated schema following the JsonElementExtractionStrategy format
|
||||||
@@ -1130,7 +1130,8 @@ In this scenario, use your best judgment to generate the schema. Try to maximize
|
|||||||
prompt_with_variables="\n\n".join([system_message["content"], user_message["content"]]),
|
prompt_with_variables="\n\n".join([system_message["content"], user_message["content"]]),
|
||||||
json_response = True,
|
json_response = True,
|
||||||
api_token=llm_config.api_token,
|
api_token=llm_config.api_token,
|
||||||
**kwargs
|
base_url=llm_config.base_url,
|
||||||
|
extra_args=kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract and return schema
|
# Extract and return schema
|
||||||
|
|||||||
@@ -37,13 +37,33 @@ class CrawlStatus(Enum):
|
|||||||
FAILED = "FAILED"
|
FAILED = "FAILED"
|
||||||
|
|
||||||
|
|
||||||
|
# @dataclass
|
||||||
|
# class CrawlStats:
|
||||||
|
# task_id: str
|
||||||
|
# url: str
|
||||||
|
# status: CrawlStatus
|
||||||
|
# start_time: Optional[datetime] = None
|
||||||
|
# end_time: Optional[datetime] = None
|
||||||
|
# memory_usage: float = 0.0
|
||||||
|
# peak_memory: float = 0.0
|
||||||
|
# error_message: str = ""
|
||||||
|
|
||||||
|
# @property
|
||||||
|
# def duration(self) -> str:
|
||||||
|
# if not self.start_time:
|
||||||
|
# return "0:00"
|
||||||
|
# end = self.end_time or datetime.now()
|
||||||
|
# duration = end - self.start_time
|
||||||
|
# return str(timedelta(seconds=int(duration.total_seconds())))
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class CrawlStats:
|
class CrawlStats:
|
||||||
task_id: str
|
task_id: str
|
||||||
url: str
|
url: str
|
||||||
status: CrawlStatus
|
status: CrawlStatus
|
||||||
start_time: Optional[datetime] = None
|
start_time: Optional[Union[datetime, float]] = None
|
||||||
end_time: Optional[datetime] = None
|
end_time: Optional[Union[datetime, float]] = None
|
||||||
memory_usage: float = 0.0
|
memory_usage: float = 0.0
|
||||||
peak_memory: float = 0.0
|
peak_memory: float = 0.0
|
||||||
error_message: str = ""
|
error_message: str = ""
|
||||||
@@ -52,10 +72,20 @@ class CrawlStats:
|
|||||||
def duration(self) -> str:
|
def duration(self) -> str:
|
||||||
if not self.start_time:
|
if not self.start_time:
|
||||||
return "0:00"
|
return "0:00"
|
||||||
end = self.end_time or datetime.now()
|
|
||||||
duration = end - self.start_time
|
|
||||||
return str(timedelta(seconds=int(duration.total_seconds())))
|
|
||||||
|
|
||||||
|
# Convert start_time to datetime if it's a float
|
||||||
|
start = self.start_time
|
||||||
|
if isinstance(start, float):
|
||||||
|
start = datetime.fromtimestamp(start)
|
||||||
|
|
||||||
|
# Get end time or use current time
|
||||||
|
end = self.end_time or datetime.now()
|
||||||
|
# Convert end_time to datetime if it's a float
|
||||||
|
if isinstance(end, float):
|
||||||
|
end = datetime.fromtimestamp(end)
|
||||||
|
|
||||||
|
duration = end - start
|
||||||
|
return str(timedelta(seconds=int(duration.total_seconds())))
|
||||||
|
|
||||||
class DisplayMode(Enum):
|
class DisplayMode(Enum):
|
||||||
DETAILED = "DETAILED"
|
DETAILED = "DETAILED"
|
||||||
|
|||||||
Reference in New Issue
Block a user