From 9f5eef1f3890094a4df707458fa611a83398429d Mon Sep 17 00:00:00 2001 From: UncleCode Date: Wed, 6 Nov 2024 21:50:09 +0800 Subject: [PATCH] Refactored the `CustomHTML2Text` class in `content_scrapping_strategy.py` to remove the handling logic for header tags (h1-h6), which are now commented out. This cleanup improves code readability and reduces maintenance overhead. --- crawl4ai/content_scrapping_strategy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crawl4ai/content_scrapping_strategy.py b/crawl4ai/content_scrapping_strategy.py index 66b3ad91..caed7319 100644 --- a/crawl4ai/content_scrapping_strategy.py +++ b/crawl4ai/content_scrapping_strategy.py @@ -93,8 +93,8 @@ class CustomHTML2Text(HTML2Text): else: self.o('\n```') self.inside_pre = False - elif tag in ["h1", "h2", "h3", "h4", "h5", "h6"]: - pass + # elif tag in ["h1", "h2", "h3", "h4", "h5", "h6"]: + # pass else: super().handle_tag(tag, attrs, start)