diff --git a/crawl4ai/chunking_strategy.py b/crawl4ai/chunking_strategy.py index 59006072..d16e4f48 100644 --- a/crawl4ai/chunking_strategy.py +++ b/crawl4ai/chunking_strategy.py @@ -55,7 +55,7 @@ class TopicSegmentationChunking(ChunkingStrategy): def __init__(self, num_keywords=3, **kwargs): import nltk as nl - self.tokenizer = nl.toknize.TextTilingTokenizer() + self.tokenizer = nl.tokenize.TextTilingTokenizer() self.num_keywords = num_keywords def chunk(self, text: str) -> list: diff --git a/crawl4ai/crawler_strategy.py b/crawl4ai/crawler_strategy.py index 85ba4450..78148f68 100644 --- a/crawl4ai/crawler_strategy.py +++ b/crawl4ai/crawler_strategy.py @@ -292,15 +292,22 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy): # Open the screenshot with PIL image = Image.open(BytesIO(screenshot)) + # Convert image to RGB mode + rgb_image = image.convert('RGB') + # Convert to JPEG and compress buffered = BytesIO() - image.save(buffered, format="JPEG", quality=85) + rgb_image.save(buffered, format="JPEG", quality=85) img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8') if self.verbose: print(f"[LOG] 📸 Screenshot taken and converted to base64") return img_base64 + except Exception as e: + if self.verbose: + print(f"[ERROR] Failed to take screenshot: {str(e)}") + return "" except Exception as e: error_message = sanitize_input_encode(f"Failed to take screenshot: {str(e)}")