Merge pull request #229 from bizrockman/main
Preventing NoneType has no attribute get Errors
This commit is contained in:
@@ -706,9 +706,12 @@ def get_content_of_website_optimized(url: str, html: str, word_count_threshold:
|
||||
body = flatten_nested_elements(body)
|
||||
base64_pattern = re.compile(r'data:image/[^;]+;base64,([^"]+)')
|
||||
for img in imgs:
|
||||
src = img.get('src', '')
|
||||
if base64_pattern.match(src):
|
||||
img['src'] = base64_pattern.sub('', src)
|
||||
try:
|
||||
src = img.get('src', '')
|
||||
if base64_pattern.match(src):
|
||||
img['src'] = base64_pattern.sub('', src)
|
||||
except:
|
||||
pass
|
||||
|
||||
cleaned_html = str(body).replace('\n\n', '\n').replace(' ', ' ')
|
||||
cleaned_html = sanitize_html(cleaned_html)
|
||||
|
||||
@@ -113,4 +113,4 @@ Here’s a clear and focused outline for the **Media Handling: Images, Videos, a
|
||||
|
||||
---
|
||||
|
||||
This outline provides users with a complete guide to handling images, videos, and audio in Crawl4AI, using metadata to enhance relevance and precision in multimedia extraction.
|
||||
This outline provides users with a complete guide to handling images, videos, and audio in Crawl4AI, using metadata to enhance relevance and precision in multimedia extraction.
|
||||
@@ -183,4 +183,4 @@ Here’s a detailed outline for the **JSON-CSS Extraction Strategy** video, cove
|
||||
|
||||
---
|
||||
|
||||
This outline covers each JSON-CSS Extraction option in Crawl4AI, with practical examples and schema configurations, making it a thorough guide for users.
|
||||
This outline covers each JSON-CSS Extraction option in Crawl4AI, with practical examples and schema configurations, making it a thorough guide for users.
|
||||
@@ -150,4 +150,4 @@ Here’s a comprehensive outline for the **LLM Extraction Strategy** video, cove
|
||||
|
||||
---
|
||||
|
||||
This outline explains LLM Extraction in Crawl4AI, with examples showing how to extract structured data using custom schemas and instructions. It demonstrates flexibility with multiple providers, ensuring practical application for different use cases.
|
||||
This outline explains LLM Extraction in Crawl4AI, with examples showing how to extract structured data using custom schemas and instructions. It demonstrates flexibility with multiple providers, ensuring practical application for different use cases.
|
||||
@@ -133,4 +133,4 @@ Here’s a structured outline for the **Cosine Similarity Strategy** video, cove
|
||||
|
||||
---
|
||||
|
||||
This outline covers Cosine Similarity Strategy’s speed and effectiveness, providing examples that showcase its potential for clustering various content types efficiently.
|
||||
This outline covers Cosine Similarity Strategy’s speed and effectiveness, providing examples that showcase its potential for clustering various content types efficiently.
|
||||
Reference in New Issue
Block a user