chore: Fix typo in chunking_strategy.py and crawler_strategy.py
The commit fixes a typo in the `chunking_strategy.py` file where `nl.toknize.TextTilingTokenizer()` was corrected to `nl.tokenize.TextTilingTokenizer()`. Additionally, in the `crawler_strategy.py` file, the commit converts the screenshot image to RGB mode before saving it as a JPEG. This ensures consistent image quality and compression.
This commit is contained in:
@@ -55,7 +55,7 @@ class TopicSegmentationChunking(ChunkingStrategy):
|
|||||||
|
|
||||||
def __init__(self, num_keywords=3, **kwargs):
|
def __init__(self, num_keywords=3, **kwargs):
|
||||||
import nltk as nl
|
import nltk as nl
|
||||||
self.tokenizer = nl.toknize.TextTilingTokenizer()
|
self.tokenizer = nl.tokenize.TextTilingTokenizer()
|
||||||
self.num_keywords = num_keywords
|
self.num_keywords = num_keywords
|
||||||
|
|
||||||
def chunk(self, text: str) -> list:
|
def chunk(self, text: str) -> list:
|
||||||
|
|||||||
@@ -292,15 +292,22 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy):
|
|||||||
# Open the screenshot with PIL
|
# Open the screenshot with PIL
|
||||||
image = Image.open(BytesIO(screenshot))
|
image = Image.open(BytesIO(screenshot))
|
||||||
|
|
||||||
|
# Convert image to RGB mode
|
||||||
|
rgb_image = image.convert('RGB')
|
||||||
|
|
||||||
# Convert to JPEG and compress
|
# Convert to JPEG and compress
|
||||||
buffered = BytesIO()
|
buffered = BytesIO()
|
||||||
image.save(buffered, format="JPEG", quality=85)
|
rgb_image.save(buffered, format="JPEG", quality=85)
|
||||||
img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
||||||
|
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print(f"[LOG] 📸 Screenshot taken and converted to base64")
|
print(f"[LOG] 📸 Screenshot taken and converted to base64")
|
||||||
|
|
||||||
return img_base64
|
return img_base64
|
||||||
|
except Exception as e:
|
||||||
|
if self.verbose:
|
||||||
|
print(f"[ERROR] Failed to take screenshot: {str(e)}")
|
||||||
|
return ""
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_message = sanitize_input_encode(f"Failed to take screenshot: {str(e)}")
|
error_message = sanitize_input_encode(f"Failed to take screenshot: {str(e)}")
|
||||||
|
|||||||
Reference in New Issue
Block a user