feat(v0.3.6): Add screenshot capture, delayed content, and custom timeouts

- Implement screenshot capture functionality
- Add delayed content retrieval method
- Introduce custom page timeout parameter
- Enhance LLM support with multiple providers
- Improve database schema auto-updates
- Optimize image processing in WebScrappingStrategy
- Update error handling and logging
- Expand examples in quickstart_async.py
This commit is contained in:
unclecode
2024-10-12 13:42:42 +08:00
parent b99d20b725
commit ff3524d9b1
8 changed files with 127 additions and 22 deletions

View File

@@ -170,10 +170,12 @@ class WebScrappingStrategy(ContentScrappingStrategy):
if isinstance(element, Comment):
element.extract()
return False
# if element.name == 'img':
# process_image(element, url, 0, 1)
# return True
if element.name in ['script', 'style', 'link', 'meta', 'noscript']:
if element.name == 'img':
process_image(element, url, 0, 1)
element.decompose()
return False