[v0.3.72] Enhance content extraction and proxy support

- Add ContentCleaningStrategy for improved content extraction - Implement advanced proxy configuration with authentication - Enhance image source detection and handling - Add fit_markdown and fit_html for refined content output - Improve external link and image handling flexibility
2024-10-22 20:19:22 +08:00
parent 04d16e6d2b
commit 60ba131ac8
6 changed files with 260 additions and 3 deletions
--- a/crawl4ai/models.py
+++ b/crawl4ai/models.py
@@ -14,6 +14,8 @@ class CrawlResult(BaseModel):
    links: Dict[str, List[Dict]] = {}
    screenshot: Optional[str] = None
    markdown: Optional[str] = None
+    fit_markdown: Optional[str] = None
+    fit_html: Optional[str] = None
    extracted_content: Optional[str] = None
    metadata: Optional[dict] = None
    error_message: Optional[str] = None