Refactored the CustomHTML2Text class in content_scrapping_strategy.py to remove the handling logic for header tags (h1-h6), which are now commented out. This cleanup improves code readability and reduces maintenance overhead.

2024-11-06 21:50:09 +08:00
parent b51263664e
commit 9f5eef1f38
1 changed files with 2 additions and 2 deletions
--- a/crawl4ai/content_scrapping_strategy.py
+++ b/crawl4ai/content_scrapping_strategy.py
@@ -93,8 +93,8 @@ class CustomHTML2Text(HTML2Text):
            else:
                self.o('\n```')
                self.inside_pre = False
-        elif tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:
+        # elif tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:
-            pass
+        #     pass
        else:
            super().handle_tag(tag, attrs, start)