feat(docker): add Docker service integration and config serialization

Add Docker service integration with FastAPI server and client implementation.
Implement serialization utilities for BrowserConfig and CrawlerRunConfig to support
Docker service communication. Clean up imports and improve error handling.

- Add Crawl4aiDockerClient class
- Implement config serialization/deserialization
- Add FastAPI server with streaming support
- Add health check endpoint
- Clean up imports and type hints
This commit is contained in:
UncleCode
2025-01-31 18:00:16 +08:00
parent ce4f04dad2
commit 53ac3ec0b4
11 changed files with 859 additions and 97 deletions

36
deploy/docker/utils.py Normal file
View File

@@ -0,0 +1,36 @@
import json
from datetime import datetime
class CrawlJSONEncoder(json.JSONEncoder):
"""Custom JSON encoder for crawler results"""
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, bytes):
return obj.decode('utf-8', errors='ignore')
if hasattr(obj, 'model_dump'):
return obj.model_dump()
if hasattr(obj, '__dict__'):
return {k: v for k, v in obj.__dict__.items() if not k.startswith('_')}
return str(obj) # Fallback to string representation
def serialize_result(result) -> dict:
"""Safely serialize a crawler result"""
try:
# Convert to dict handling special cases
if hasattr(result, 'model_dump'):
result_dict = result.model_dump()
else:
result_dict = {
k: v for k, v in result.__dict__.items()
if not k.startswith('_')
}
# Remove known non-serializable objects
result_dict.pop('ssl_certificate', None)
result_dict.pop('downloaded_files', None)
return result_dict
except Exception as e:
print(f"Error serializing result: {e}")
return {"error": str(e), "url": getattr(result, 'url', 'unknown')}