Refactored the CustomHTML2Text class in content_scrapping_strategy.py to remove the handling logic for header tags (h1-h6), which are now commented out. This cleanup improves code readability and reduces maintenance overhead.
This commit is contained in:
@@ -93,8 +93,8 @@ class CustomHTML2Text(HTML2Text):
|
|||||||
else:
|
else:
|
||||||
self.o('\n```')
|
self.o('\n```')
|
||||||
self.inside_pre = False
|
self.inside_pre = False
|
||||||
elif tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:
|
# elif tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:
|
||||||
pass
|
# pass
|
||||||
else:
|
else:
|
||||||
super().handle_tag(tag, attrs, start)
|
super().handle_tag(tag, attrs, start)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user