Files
crawl4ai/deploy/docker/models.py
UncleCode 53ac3ec0b4 feat(docker): add Docker service integration and config serialization
Add Docker service integration with FastAPI server and client implementation.
Implement serialization utilities for BrowserConfig and CrawlerRunConfig to support
Docker service communication. Clean up imports and improve error handling.

- Add Crawl4aiDockerClient class
- Implement config serialization/deserialization
- Add FastAPI server with streaming support
- Add health check endpoint
- Clean up imports and type hints
2025-01-31 18:00:16 +08:00

79 lines
2.6 KiB
Python

from typing import List, Optional, Any, Dict
from pydantic import BaseModel
from crawl4ai import (
BrowserConfig,
CrawlerRunConfig,
DefaultMarkdownGenerator,
PruningContentFilter,
BM25ContentFilter,
LLMContentFilter,
# Add other strategy classes as needed
)
class StrategyConfig(BaseModel):
"""Base class for strategy configurations"""
type: str
params: Dict[str, Any]
def create_instance(self):
"""Convert config to actual strategy instance"""
strategy_mappings = {
# Markdown Generators
'DefaultMarkdownGenerator': DefaultMarkdownGenerator,
# Content Filters
'PruningContentFilter': PruningContentFilter,
'BM25ContentFilter': BM25ContentFilter,
'LLMContentFilter': LLMContentFilter,
# Add other mappings as needed
# 'CustomStrategy': CustomStrategyClass,
}
strategy_class = strategy_mappings.get(self.type)
if not strategy_class:
raise ValueError(f"Unknown strategy type: {self.type}")
# Handle nested strategy configurations
processed_params = {}
for key, value in self.params.items():
if isinstance(value, dict) and 'type' in value:
# Recursively create nested strategy instances
nested_config = StrategyConfig(type=value['type'], params=value.get('params', {}))
processed_params[key] = nested_config.create_instance()
else:
processed_params[key] = value
return strategy_class(**processed_params)
class CrawlRequest(BaseModel):
urls: List[str]
browser_config: Optional[dict] = None
crawler_config: Optional[dict] = None
def get_configs(self):
"""Enhanced conversion of dicts to config objects"""
browser_config = BrowserConfig.from_kwargs(self.browser_config or {})
crawler_dict = self.crawler_config or {}
# Process strategy configurations
for key, value in crawler_dict.items():
if isinstance(value, dict) and 'type' in value:
# Convert strategy configuration to actual instance
strategy_config = StrategyConfig(
type=value['type'],
params=value.get('params', {})
)
crawler_dict[key] = strategy_config.create_instance()
crawler_config = CrawlerRunConfig.from_kwargs(crawler_dict)
return browser_config, crawler_config
class CrawlResponse(BaseModel):
success: bool
results: List[dict] # Will contain serialized CrawlResults
class Config:
arbitrary_types_allowed = True