From 3a234ec950d7acf3e3a0e0a374f4532bafab2b4b Mon Sep 17 00:00:00 2001 From: UncleCode Date: Wed, 5 Mar 2025 17:14:42 +0800 Subject: [PATCH] fix(auth): make JWT authentication optional with fallback Modify authentication system to gracefully handle cases where JWT is not enabled or token is missing. This includes: - Making HTTPBearer auto_error=False to prevent automatic 403 errors - Updating token dependency to return None when JWT is disabled - Fixing model deserialization in CrawlResult - Updating documentation links - Cleaning up imports BREAKING CHANGE: Authentication behavior changed to be more permissive when JWT is disabled --- crawl4ai/async_database.py | 2 +- crawl4ai/markdown_generation_strategy.py | 4 ++-- crawl4ai/models.py | 6 +++++- deploy/docker/README.md | 4 ++-- deploy/docker/auth.py | 15 ++++++++++++--- deploy/docker/server.py | 2 +- 6 files changed, 23 insertions(+), 10 deletions(-) diff --git a/crawl4ai/async_database.py b/crawl4ai/async_database.py index 02b377e6..e9a9daf1 100644 --- a/crawl4ai/async_database.py +++ b/crawl4ai/async_database.py @@ -4,10 +4,10 @@ import aiosqlite import asyncio from typing import Optional, Dict from contextlib import asynccontextmanager -import logging import json # Added for serialization/deserialization from .utils import ensure_content_dirs, generate_content_hash from .models import CrawlResult, MarkdownGenerationResult, StringCompatibleMarkdown +# , StringCompatibleMarkdown import aiofiles from .utils import VersionManager from .async_logger import AsyncLogger diff --git a/crawl4ai/markdown_generation_strategy.py b/crawl4ai/markdown_generation_strategy.py index 56b5a402..e89239f3 100644 --- a/crawl4ai/markdown_generation_strategy.py +++ b/crawl4ai/markdown_generation_strategy.py @@ -2,8 +2,8 @@ from abc import ABC, abstractmethod from typing import Optional, Dict, Any, Tuple from .models import MarkdownGenerationResult from .html2text import CustomHTML2Text -from .types import RelevantContentFilter -# from .content_filter_strategy import RelevantContentFilter +# from .types import RelevantContentFilter +from .content_filter_strategy import RelevantContentFilter import re from urllib.parse import urljoin diff --git a/crawl4ai/models.py b/crawl4ai/models.py index 4c96adf0..ef9efc06 100644 --- a/crawl4ai/models.py +++ b/crawl4ai/models.py @@ -149,7 +149,11 @@ class CrawlResult(BaseModel): markdown_result = data.pop('markdown', None) super().__init__(**data) if markdown_result is not None: - self._markdown = markdown_result + self._markdown = ( + MarkdownGenerationResult(**markdown_result) + if isinstance(markdown_result, dict) + else markdown_result + ) @property def markdown(self): diff --git a/deploy/docker/README.md b/deploy/docker/README.md index fbed6576..fdcb9744 100644 --- a/deploy/docker/README.md +++ b/deploy/docker/README.md @@ -595,8 +595,8 @@ curl http://localhost:8000/health ## Complete Examples Check out the `examples` folder in our repository for full working examples! Here are two to get you started: -[Using Client SDK](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_sdk_example.py) -[Using REST API](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_rest_api_example.py) +[Using Client SDK](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_sdk.py) +[Using REST API](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_rest_api.py) ## Server Configuration diff --git a/deploy/docker/auth.py b/deploy/docker/auth.py index 8851bd36..f9e75d78 100644 --- a/deploy/docker/auth.py +++ b/deploy/docker/auth.py @@ -10,7 +10,7 @@ from pydantic.main import BaseModel import base64 instance = JWT() -security = HTTPBearer() +security = HTTPBearer(auto_error=False) SECRET_KEY = os.environ.get("SECRET_KEY", "mysecret") ACCESS_TOKEN_EXPIRE_MINUTES = 60 @@ -30,6 +30,9 @@ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) - def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)) -> Dict: """Verify the JWT token from the Authorization header.""" + + if credentials is None: + return None token = credentials.credentials verifying_key = get_jwk_from_secret(SECRET_KEY) try: @@ -38,9 +41,15 @@ def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)) except Exception: raise HTTPException(status_code=401, detail="Invalid or expired token") + def get_token_dependency(config: Dict): - """Return the token dependency if JWT is enabled, else None.""" - return verify_token if config.get("security", {}).get("jwt_enabled", False) else None + """Return the token dependency if JWT is enabled, else a function that returns None.""" + + if config.get("security", {}).get("jwt_enabled", False): + return verify_token + else: + return lambda: None + class TokenRequest(BaseModel): email: EmailStr \ No newline at end of file diff --git a/deploy/docker/server.py b/deploy/docker/server.py index a5994989..edb55130 100644 --- a/deploy/docker/server.py +++ b/deploy/docker/server.py @@ -92,7 +92,7 @@ async def get_markdown( f: FilterType = FilterType.FIT, q: Optional[str] = None, c: Optional[str] = "0", - # token_data: Optional[Dict] = Depends(token_dependency) + token_data: Optional[Dict] = Depends(token_dependency) ): result = await handle_markdown_request(url, f, q, c, config) return PlainTextResponse(result)