feat(docker): implement supervisor and secure API endpoints

Add supervisor configuration for managing Redis and Gunicorn processes Replace direct process management with supervisord Add secure and token-free API server variants Implement JWT authentication for protected endpoints Update datetime handling in async dispatcher Add email domain verification BREAKING CHANGE: Server startup now uses supervisord instead of direct process management
2025-02-17 20:31:20 +08:00
parent 8bb799068e
commit 2864015469
12 changed files with 790 additions and 79 deletions
--- a/deploy/docker/server.py
+++ b/deploy/docker/server.py
@@ -18,6 +18,7 @@ from fastapi.responses import PlainTextResponse
 from fastapi.responses import JSONResponse
 from fastapi.background import BackgroundTasks
 from typing import Dict
+from fastapi import Query, Path
 import os

 from utils import (
@@ -27,7 +28,8 @@ from utils import (
 )
 from api import (
    handle_markdown_request,
-    handle_llm_request
+    handle_llm_request,
+    handle_llm_qa
 )

 # Load configuration and setup
@@ -100,28 +102,56 @@ async def get_markdown(
    result = await handle_markdown_request(url, f, q, c, config)
    return PlainTextResponse(result)

-@app.get("/llm/{input:path}")
-@limiter.limit(config["rate_limiting"]["default_limit"])
+@app.get("/llm/{url:path}", description="URL should be without http/https prefix")
 async def llm_endpoint(
    request: Request,
-    background_tasks: BackgroundTasks,
-    input: str,
-    q: Optional[str] = None,
-    s: Optional[str] = None,
-    c: Optional[str] = "0"
+    url: str = Path(..., description="Domain and path without protocol"),
+    q: Optional[str] = Query(None, description="Question to ask about the page content"),
 ):
-    """Handle LLM extraction requests."""
-    return await handle_llm_request(
-        redis, background_tasks, request, input, q, s, c, config
-    )
+    """QA endpoint that uses LLM with crawled content as context."""
+    if not q:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Query parameter 'q' is required"
+        )

+    # Ensure URL starts with http/https
+    if not url.startswith(('http://', 'https://')):
+        url = 'https://' + url
+
+    try:
+        answer = await handle_llm_qa(url, q, config)
+        return JSONResponse({"answer": answer})
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=str(e)
+        )
+
+# @app.get("/llm/{input:path}")
+# @limiter.limit(config["rate_limiting"]["default_limit"])
+# async def llm_endpoint(
+#     request: Request,
+#     background_tasks: BackgroundTasks,
+#     input: str,
+#     q: Optional[str] = None,
+#     s: Optional[str] = None,
+#     c: Optional[str] = "0"
+# ):
+#     """Handle LLM extraction requests."""
+#     return await handle_llm_request(
+#         redis, background_tasks, request, input, q, s, c, config
+#     )
+
+
+    
@app.get("/schema")
 async def get_schema():
    """Endpoint for client-side validation schema."""
    from crawl4ai import BrowserConfig, CrawlerRunConfig
    return {
-        "browser": BrowserConfig.model_json_schema(),
-        "crawler": CrawlerRunConfig.model_json_schema()
+        "browser": BrowserConfig().dump(),
+        "crawler": CrawlerRunConfig().dump()
    }

@app.get(config["observability"]["health_check"]["endpoint"])