From d7b37e849d663bfc570bd9fec316f8bd262b0d0a Mon Sep 17 00:00:00 2001 From: unclecode Date: Fri, 17 May 2024 16:50:38 +0800 Subject: [PATCH] chore: Update CrawlRequest model to use NoExtractionStrategy as default --- main.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/main.py b/main.py index 5fc01a75..45e37515 100644 --- a/main.py +++ b/main.py @@ -44,14 +44,12 @@ def get_crawler(): return WebCrawler() class CrawlRequest(BaseModel): - urls: List[HttpUrl] - provider_model: str - api_token: str + urls: List[str] include_raw_html: Optional[bool] = False bypass_cache: bool = False extract_blocks: bool = True word_count_threshold: Optional[int] = 5 - extraction_strategy: Optional[str] = "CosineStrategy" + extraction_strategy: Optional[str] = "NoExtractionStrategy" extraction_strategy_args: Optional[dict] = {} chunking_strategy: Optional[str] = "RegexChunking" chunking_strategy_args: Optional[dict] = {} @@ -95,9 +93,6 @@ def import_strategy(module_name: str, class_name: str, *args, **kwargs): @app.post("/crawl") async def crawl_urls(crawl_request: CrawlRequest, request: Request): global current_requests - # Raise error if api_token is not provided - if not crawl_request.api_token: - raise HTTPException(status_code=401, detail="API token is required.") async with lock: if current_requests >= MAX_CONCURRENT_REQUESTS: raise HTTPException(status_code=429, detail="Too many requests - please try again later.")