From 0bba0e074f720a5d03027ee8fdf699f46ce8af82 Mon Sep 17 00:00:00 2001 From: bizrockman Date: Mon, 4 Nov 2024 20:12:24 +0100 Subject: [PATCH] Preventing NoneType has no attribute get Errors Sometimes the list contains Tag elements that do not have attrs set, resulting in this Error. --- crawl4ai/utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py index baa08a0f..869c22d5 100644 --- a/crawl4ai/utils.py +++ b/crawl4ai/utils.py @@ -706,9 +706,12 @@ def get_content_of_website_optimized(url: str, html: str, word_count_threshold: body = flatten_nested_elements(body) base64_pattern = re.compile(r'data:image/[^;]+;base64,([^"]+)') for img in imgs: - src = img.get('src', '') - if base64_pattern.match(src): - img['src'] = base64_pattern.sub('', src) + try: + src = img.get('src', '') + if base64_pattern.match(src): + img['src'] = base64_pattern.sub('', src) + except: + pass cleaned_html = str(body).replace('\n\n', '\n').replace(' ', ' ') cleaned_html = sanitize_html(cleaned_html)