From 40477493d328399e747b9e9248430a30b1ea0b31 Mon Sep 17 00:00:00 2001 From: unclecode Date: Wed, 31 Jul 2024 16:15:55 +0800 Subject: [PATCH] refactor: Remove image format dot in get_content_of_website_optimized The code change removes the dot from the image format in the `get_content_of_website_optimized` function. This change ensures consistency in the image format and improves the functionality. --- crawl4ai/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py index 679c6d1d..e0d5d1ea 100644 --- a/crawl4ai/utils.py +++ b/crawl4ai/utils.py @@ -498,6 +498,8 @@ def get_content_of_website_optimized(url: str, html: str, word_count_threshold: width_value, width_unit = parse_dimension(image_width) image_size = 0 #int(fetch_image_file_size(img,base_url) or 0) image_format = os.path.splitext(img.get('src',''))[1].lower() + # Remove . from format + image_format = image_format.strip('.') score = 0 if height_value: if height_unit == 'px' and height_value > 150: