feat(docker): add Docker deployment configuration and API server

Add Docker deployment setup with FastAPI server implementation for Crawl4AI:
- Create Dockerfile with Python 3.10 and Playwright dependencies
- Implement FastAPI server with streaming and non-streaming endpoints
- Add request/response models and JSON serialization
- Include test script for API verification

Also includes:
- Update .gitignore for Continue development files
- Add project rules in .continuerules
- Clean up async_dispatcher.py formatting
This commit is contained in:
UncleCode
2025-01-31 15:22:21 +08:00
parent f81712eb91
commit ce4f04dad2
9 changed files with 426 additions and 638 deletions

78
deploy/docker/models.py Normal file
View File

@@ -0,0 +1,78 @@
from typing import List, Optional, Any, Dict
from pydantic import BaseModel
from crawl4ai import (
BrowserConfig,
CrawlerRunConfig,
DefaultMarkdownGenerator,
PruningContentFilter,
BM25ContentFilter,
LLMContentFilter,
# Add other strategy classes as needed
)
class StrategyConfig(BaseModel):
"""Base class for strategy configurations"""
type: str
params: Dict[str, Any]
def create_instance(self):
"""Convert config to actual strategy instance"""
strategy_mappings = {
# Markdown Generators
'DefaultMarkdownGenerator': DefaultMarkdownGenerator,
# Content Filters
'PruningContentFilter': PruningContentFilter,
'BM25ContentFilter': BM25ContentFilter,
'LLMContentFilter': LLMContentFilter,
# Add other mappings as needed
# 'CustomStrategy': CustomStrategyClass,
}
strategy_class = strategy_mappings.get(self.type)
if not strategy_class:
raise ValueError(f"Unknown strategy type: {self.type}")
# Handle nested strategy configurations
processed_params = {}
for key, value in self.params.items():
if isinstance(value, dict) and 'type' in value:
# Recursively create nested strategy instances
nested_config = StrategyConfig(type=value['type'], params=value.get('params', {}))
processed_params[key] = nested_config.create_instance()
else:
processed_params[key] = value
return strategy_class(**processed_params)
class CrawlRequest(BaseModel):
urls: List[str]
browser_config: Optional[dict] = None
crawler_config: Optional[dict] = None
def get_configs(self):
"""Enhanced conversion of dicts to config objects"""
browser_config = BrowserConfig.from_kwargs(self.browser_config or {})
crawler_dict = self.crawler_config or {}
# Process strategy configurations
for key, value in crawler_dict.items():
if isinstance(value, dict) and 'type' in value:
# Convert strategy configuration to actual instance
strategy_config = StrategyConfig(
type=value['type'],
params=value.get('params', {})
)
crawler_dict[key] = strategy_config.create_instance()
crawler_config = CrawlerRunConfig.from_kwargs(crawler_dict)
return browser_config, crawler_config
class CrawlResponse(BaseModel):
success: bool
results: List[dict] # Will contain serialized CrawlResults
class Config:
arbitrary_types_allowed = True