Add link analysis tests and integration tests for /links/analyze endpoint
- Implemented `test_link_analysis` in `test_docker.py` to validate link analysis functionality. - Created `test_link_analysis.py` with comprehensive tests for link analysis, including basic functionality, configuration options, error handling, performance, and edge cases. - Added integration tests in `test_link_analysis_integration.py` to verify the /links/analyze endpoint, including health checks, authentication, and error handling.
This commit is contained in:
@@ -7,9 +7,37 @@ Crawl4AI FastAPI entry‑point
|
||||
"""
|
||||
|
||||
# ── stdlib & 3rd‑party imports ───────────────────────────────
|
||||
from crawler_pool import get_crawler, close_all, janitor
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LinkPreviewConfig
|
||||
from auth import create_access_token, get_token_dependency, TokenRequest
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List, Dict
|
||||
from fastapi import Request, Depends
|
||||
from fastapi.responses import FileResponse
|
||||
import ast
|
||||
import asyncio
|
||||
import base64
|
||||
import re
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LinkPreviewConfig
|
||||
from api import (
|
||||
handle_markdown_request, handle_llm_qa,
|
||||
handle_stream_crawl_request, handle_crawl_request,
|
||||
stream_results
|
||||
)
|
||||
from schemas import (
|
||||
CrawlRequestWithHooks,
|
||||
MarkdownRequest,
|
||||
RawCode,
|
||||
HTMLRequest,
|
||||
ScreenshotRequest,
|
||||
PDFRequest,
|
||||
JSEndpointRequest,
|
||||
LinkAnalysisRequest,
|
||||
)
|
||||
|
||||
from utils import (
|
||||
FilterType, load_config, setup_logging, verify_email_domain
|
||||
)
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
@@ -1045,6 +1073,57 @@ async def execute_js(
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/links/analyze")
|
||||
@limiter.limit(config["rate_limiting"]["default_limit"])
|
||||
@mcp_tool("links_analyze")
|
||||
async def analyze_links(
|
||||
request: Request,
|
||||
body: LinkAnalysisRequest,
|
||||
_td: Dict = Depends(token_dep),
|
||||
):
|
||||
"""
|
||||
Analyze and score links on a webpage.
|
||||
Returns a dictionary of links with their scores and metadata.
|
||||
"""
|
||||
try:
|
||||
# Create AsyncWebCrawler instance
|
||||
async with AsyncWebCrawler(config=BrowserConfig()) as crawler:
|
||||
# Deserialize config dict to LinkPreviewConfig, use default if not provided
|
||||
link_preview_config = LinkPreviewConfig.from_dict(body.config) if body.config else LinkPreviewConfig()
|
||||
|
||||
# Create CrawlerRunConfig with link analysis settings
|
||||
run_config = CrawlerRunConfig(
|
||||
link_preview_config=link_preview_config,
|
||||
score_links=True,
|
||||
screenshot=False,
|
||||
pdf=False,
|
||||
extraction_strategy=None
|
||||
)
|
||||
|
||||
# Execute the crawl
|
||||
result = await crawler.arun(url=body.url, config=run_config)
|
||||
|
||||
# Check if crawl was successful
|
||||
if not result.success:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=result.error_message or "Crawl failed"
|
||||
)
|
||||
|
||||
# Extract and return the links dictionary
|
||||
return JSONResponse(result.links)
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTP exceptions
|
||||
raise
|
||||
except Exception as e:
|
||||
# Handle any other exceptions
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Internal server error: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@app.get("/llm/{url:path}",
|
||||
summary="LLM Q&A",
|
||||
description="Ask questions about a webpage using LLM.",
|
||||
|
||||
Reference in New Issue
Block a user