Refactored the CustomHTML2Text class in content_scrapping_strategy.py to remove the handling logic for header tags (h1-h6), which are now commented out. This cleanup improves code readability and reduces maintenance overhead.

This commit is contained in:
UncleCode
2024-11-06 21:50:09 +08:00
parent b51263664e
commit 9f5eef1f38

View File

@@ -93,8 +93,8 @@ class CustomHTML2Text(HTML2Text):
else:
self.o('\n```')
self.inside_pre = False
elif tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:
pass
# elif tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:
# pass
else:
super().handle_tag(tag, attrs, start)