Apply Ruff Corrections

This commit is contained in:
UncleCode
2025-01-13 19:19:58 +08:00
parent c3370ec5da
commit 8ec12d7d68
84 changed files with 6861 additions and 5076 deletions

View File

@@ -1,18 +1,12 @@
from pydantic import BaseModel, HttpUrl
from typing import List, Dict, Optional, Callable, Awaitable, Union, Tuple, Any
from typing import List, Dict, Optional, Callable, Awaitable, Union, Any
from enum import Enum
from dataclasses import dataclass, field
from .ssl_certificate import SSLCertificate
from dataclasses import dataclass
from .ssl_certificate import SSLCertificate
from datetime import datetime
from enum import Enum
from typing import Optional
from datetime import timedelta
###############################
# Dispatcher Models
###############################
@@ -22,6 +16,7 @@ class DomainState:
current_delay: float = 0
fail_count: int = 0
@dataclass
class CrawlerTaskResult:
task_id: str
@@ -33,12 +28,14 @@ class CrawlerTaskResult:
end_time: datetime
error_message: str = ""
class CrawlStatus(Enum):
QUEUED = "QUEUED"
IN_PROGRESS = "IN_PROGRESS"
COMPLETED = "COMPLETED"
FAILED = "FAILED"
@dataclass
class CrawlStats:
task_id: str
@@ -49,7 +46,7 @@ class CrawlStats:
memory_usage: float = 0.0
peak_memory: float = 0.0
error_message: str = ""
@property
def duration(self) -> str:
if not self.start_time:
@@ -58,26 +55,29 @@ class CrawlStats:
duration = end - self.start_time
return str(timedelta(seconds=int(duration.total_seconds())))
class DisplayMode(Enum):
DETAILED = "DETAILED"
AGGREGATED = "AGGREGATED"
###############################
# Crawler Models
###############################
@dataclass
class TokenUsage:
completion_tokens: int = 0
prompt_tokens: int = 0
prompt_tokens: int = 0
total_tokens: int = 0
completion_tokens_details: Optional[dict] = None
prompt_tokens_details: Optional[dict] = None
class UrlModel(BaseModel):
url: HttpUrl
forced: bool = False
class MarkdownGenerationResult(BaseModel):
raw_markdown: str
markdown_with_citations: str
@@ -85,6 +85,7 @@ class MarkdownGenerationResult(BaseModel):
fit_markdown: Optional[str] = None
fit_html: Optional[str] = None
class DispatchResult(BaseModel):
task_id: str
memory_usage: float
@@ -92,6 +93,8 @@ class DispatchResult(BaseModel):
start_time: datetime
end_time: datetime
error_message: str = ""
class CrawlResult(BaseModel):
url: str
html: str
@@ -101,7 +104,7 @@ class CrawlResult(BaseModel):
links: Dict[str, List[Dict]] = {}
downloaded_files: Optional[List[str]] = None
screenshot: Optional[str] = None
pdf : Optional[bytes] = None
pdf: Optional[bytes] = None
markdown: Optional[Union[str, MarkdownGenerationResult]] = None
markdown_v2: Optional[MarkdownGenerationResult] = None
fit_markdown: Optional[str] = None
@@ -114,9 +117,11 @@ class CrawlResult(BaseModel):
status_code: Optional[int] = None
ssl_certificate: Optional[SSLCertificate] = None
dispatch_result: Optional[DispatchResult] = None
class Config:
arbitrary_types_allowed = True
class AsyncCrawlResponse(BaseModel):
html: str
response_headers: Dict[str, str]
@@ -130,6 +135,7 @@ class AsyncCrawlResponse(BaseModel):
class Config:
arbitrary_types_allowed = True
###############################
# Scraping Models
###############################
@@ -143,21 +149,29 @@ class MediaItem(BaseModel):
format: Optional[str] = None
width: Optional[int] = None
class Link(BaseModel):
href: str
text: str
title: Optional[str] = None
base_domain: str
class Media(BaseModel):
images: List[MediaItem] = []
videos: List[MediaItem] = [] # Using MediaItem model for now, can be extended with Video model if needed
audios: List[MediaItem] = [] # Using MediaItem model for now, can be extended with Audio model if needed
videos: List[
MediaItem
] = [] # Using MediaItem model for now, can be extended with Video model if needed
audios: List[
MediaItem
] = [] # Using MediaItem model for now, can be extended with Audio model if needed
class Links(BaseModel):
internal: List[Link] = []
external: List[Link] = []
class ScrapingResult(BaseModel):
cleaned_html: str
success: bool