chore: Update CrawlRequest model to use NoExtractionStrategy as default
This commit is contained in:
9
main.py
9
main.py
@@ -44,14 +44,12 @@ def get_crawler():
|
|||||||
return WebCrawler()
|
return WebCrawler()
|
||||||
|
|
||||||
class CrawlRequest(BaseModel):
|
class CrawlRequest(BaseModel):
|
||||||
urls: List[HttpUrl]
|
urls: List[str]
|
||||||
provider_model: str
|
|
||||||
api_token: str
|
|
||||||
include_raw_html: Optional[bool] = False
|
include_raw_html: Optional[bool] = False
|
||||||
bypass_cache: bool = False
|
bypass_cache: bool = False
|
||||||
extract_blocks: bool = True
|
extract_blocks: bool = True
|
||||||
word_count_threshold: Optional[int] = 5
|
word_count_threshold: Optional[int] = 5
|
||||||
extraction_strategy: Optional[str] = "CosineStrategy"
|
extraction_strategy: Optional[str] = "NoExtractionStrategy"
|
||||||
extraction_strategy_args: Optional[dict] = {}
|
extraction_strategy_args: Optional[dict] = {}
|
||||||
chunking_strategy: Optional[str] = "RegexChunking"
|
chunking_strategy: Optional[str] = "RegexChunking"
|
||||||
chunking_strategy_args: Optional[dict] = {}
|
chunking_strategy_args: Optional[dict] = {}
|
||||||
@@ -95,9 +93,6 @@ def import_strategy(module_name: str, class_name: str, *args, **kwargs):
|
|||||||
@app.post("/crawl")
|
@app.post("/crawl")
|
||||||
async def crawl_urls(crawl_request: CrawlRequest, request: Request):
|
async def crawl_urls(crawl_request: CrawlRequest, request: Request):
|
||||||
global current_requests
|
global current_requests
|
||||||
# Raise error if api_token is not provided
|
|
||||||
if not crawl_request.api_token:
|
|
||||||
raise HTTPException(status_code=401, detail="API token is required.")
|
|
||||||
async with lock:
|
async with lock:
|
||||||
if current_requests >= MAX_CONCURRENT_REQUESTS:
|
if current_requests >= MAX_CONCURRENT_REQUESTS:
|
||||||
raise HTTPException(status_code=429, detail="Too many requests - please try again later.")
|
raise HTTPException(status_code=429, detail="Too many requests - please try again later.")
|
||||||
|
|||||||
Reference in New Issue
Block a user