fix(auth): make JWT authentication optional with fallback

Modify authentication system to gracefully handle cases where JWT is not enabled or token is missing. This includes:
- Making HTTPBearer auto_error=False to prevent automatic 403 errors
- Updating token dependency to return None when JWT is disabled
- Fixing model deserialization in CrawlResult
- Updating documentation links
- Cleaning up imports

BREAKING CHANGE: Authentication behavior changed to be more permissive when JWT is disabled
This commit is contained in:
UncleCode
2025-03-05 17:14:42 +08:00
parent 9e89d27fcd
commit 3a234ec950
6 changed files with 23 additions and 10 deletions

View File

@@ -4,10 +4,10 @@ import aiosqlite
import asyncio
from typing import Optional, Dict
from contextlib import asynccontextmanager
import logging
import json # Added for serialization/deserialization
from .utils import ensure_content_dirs, generate_content_hash
from .models import CrawlResult, MarkdownGenerationResult, StringCompatibleMarkdown
# , StringCompatibleMarkdown
import aiofiles
from .utils import VersionManager
from .async_logger import AsyncLogger

View File

@@ -2,8 +2,8 @@ from abc import ABC, abstractmethod
from typing import Optional, Dict, Any, Tuple
from .models import MarkdownGenerationResult
from .html2text import CustomHTML2Text
from .types import RelevantContentFilter
# from .content_filter_strategy import RelevantContentFilter
# from .types import RelevantContentFilter
from .content_filter_strategy import RelevantContentFilter
import re
from urllib.parse import urljoin

View File

@@ -149,7 +149,11 @@ class CrawlResult(BaseModel):
markdown_result = data.pop('markdown', None)
super().__init__(**data)
if markdown_result is not None:
self._markdown = markdown_result
self._markdown = (
MarkdownGenerationResult(**markdown_result)
if isinstance(markdown_result, dict)
else markdown_result
)
@property
def markdown(self):

View File

@@ -595,8 +595,8 @@ curl http://localhost:8000/health
## Complete Examples
Check out the `examples` folder in our repository for full working examples! Here are two to get you started:
[Using Client SDK](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_sdk_example.py)
[Using REST API](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_rest_api_example.py)
[Using Client SDK](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_sdk.py)
[Using REST API](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_rest_api.py)
## Server Configuration

View File

@@ -10,7 +10,7 @@ from pydantic.main import BaseModel
import base64
instance = JWT()
security = HTTPBearer()
security = HTTPBearer(auto_error=False)
SECRET_KEY = os.environ.get("SECRET_KEY", "mysecret")
ACCESS_TOKEN_EXPIRE_MINUTES = 60
@@ -30,6 +30,9 @@ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -
def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)) -> Dict:
"""Verify the JWT token from the Authorization header."""
if credentials is None:
return None
token = credentials.credentials
verifying_key = get_jwk_from_secret(SECRET_KEY)
try:
@@ -38,9 +41,15 @@ def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security))
except Exception:
raise HTTPException(status_code=401, detail="Invalid or expired token")
def get_token_dependency(config: Dict):
"""Return the token dependency if JWT is enabled, else None."""
return verify_token if config.get("security", {}).get("jwt_enabled", False) else None
"""Return the token dependency if JWT is enabled, else a function that returns None."""
if config.get("security", {}).get("jwt_enabled", False):
return verify_token
else:
return lambda: None
class TokenRequest(BaseModel):
email: EmailStr

View File

@@ -92,7 +92,7 @@ async def get_markdown(
f: FilterType = FilterType.FIT,
q: Optional[str] = None,
c: Optional[str] = "0",
# token_data: Optional[Dict] = Depends(token_dependency)
token_data: Optional[Dict] = Depends(token_dependency)
):
result = await handle_markdown_request(url, f, q, c, config)
return PlainTextResponse(result)