Update LLMExtractionStrategy to disable chunking if specified, Add example of summarization for a web page.

2024-06-19 19:03:35 +08:00
parent 1fcb573909
commit 21b110bfd7
2 changed files with 49 additions and 0 deletions
--- a/crawl4ai/extraction_strategy.py
+++ b/crawl4ai/extraction_strategy.py
@@ -77,6 +77,9 @@ class LLMExtractionStrategy(ExtractionStrategy):
        self.chunk_token_threshold = kwargs.get("chunk_token_threshold", CHUNK_TOKEN_THRESHOLD)
        self.overlap_rate = kwargs.get("overlap_rate", OVERLAP_RATE)
        self.word_token_rate = kwargs.get("word_token_rate", WORD_TOKEN_RATE)
+        self.apply_chunking = kwargs.get("apply_chunking", True)
+        if not self.apply_chunking:
+            self.chunk_token_threshold = 1e9
        
        self.verbose = kwargs.get("verbose", False)