- Extract all media tags
- Take screenshot of the page
This commit is contained in:
unclecode
2024-06-07 15:23:13 +08:00
parent aead6de888
commit 0533aeb814
8 changed files with 90 additions and 22 deletions

View File

@@ -1,5 +1,5 @@
from pydantic import BaseModel, HttpUrl
from typing import List
from typing import List, Dict
class UrlModel(BaseModel):
url: HttpUrl
@@ -10,6 +10,7 @@ class CrawlResult(BaseModel):
html: str
success: bool
cleaned_html: str = None
media: Dict[str, List[Dict]] = {}
markdown: str = None
extracted_content: str = None
metadata: dict = None