Refactor content_scrapping_strategy.py to remove excluded tags

2024-09-12 17:35:45 +08:00
parent b179aa9b6f
commit d628bc4034
1 changed files with 4 additions and 0 deletions
--- a/crawl4ai/content_scrapping_strategy.py
+++ b/crawl4ai/content_scrapping_strategy.py
@@ -43,6 +43,10 @@ class WebScrappingStrategy(ContentScrappingStrategy):
        image_description_min_word_threshold = kwargs.get('image_description_min_word_threshold', IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD)
        for tag in kwargs.get('excluded_tags', []) or []:
            for el in body.select(tag):
                el.decompose()
        if css_selector:
            selected_elements = body.select(css_selector)
            if not selected_elements: