feat(models): add dedicated tables field to CrawlResult

- Add tables field to CrawlResult model while maintaining backward compatibility
- Update async_webcrawler.py to extract tables from media and pass to tables field
- Update crypto_analysis_example.py to use the new tables field
- Add /config/dump examples to demo_docker_api.py
- Bump version to 0.6.1
This commit is contained in:
UncleCode
2025-04-24 18:36:25 +08:00
parent ad4dfb21e1
commit ccec40ed17
7 changed files with 287 additions and 26 deletions

View File

@@ -1,3 +1,3 @@
# crawl4ai/_version.py
__version__ = "0.6.0"
__version__ = "0.6.1"

View File

@@ -496,11 +496,13 @@ class AsyncWebCrawler:
cleaned_html = sanitize_input_encode(
result.get("cleaned_html", ""))
media = result.get("media", {})
tables = media.pop("tables", []) if isinstance(media, dict) else []
links = result.get("links", {})
metadata = result.get("metadata", {})
else:
cleaned_html = sanitize_input_encode(result.cleaned_html)
media = result.media.model_dump()
tables = media.pop("tables", [])
links = result.links.model_dump()
metadata = result.metadata
@@ -627,6 +629,7 @@ class AsyncWebCrawler:
cleaned_html=cleaned_html,
markdown=markdown_result,
media=media,
tables=tables, # NEW
links=links,
metadata=metadata,
screenshot=screenshot_data,

View File

@@ -1,4 +1,4 @@
from pydantic import BaseModel, HttpUrl, PrivateAttr
from pydantic import BaseModel, HttpUrl, PrivateAttr, Field
from typing import List, Dict, Optional, Callable, Awaitable, Union, Any
from typing import AsyncGenerator
from typing import Generic, TypeVar
@@ -150,6 +150,7 @@ class CrawlResult(BaseModel):
redirected_url: Optional[str] = None
network_requests: Optional[List[Dict[str, Any]]] = None
console_messages: Optional[List[Dict[str, Any]]] = None
tables: List[Dict] = Field(default_factory=list) # NEW [{headers,rows,caption,summary}]
class Config:
arbitrary_types_allowed = True