chore: Update configuration values for chunk token threshold, overlap rate, and minimum word threshold. Create a new example for LLMExtraction Strategy, update Dockerfile, and README

This commit is contained in:
unclecode
2024-06-19 18:32:20 +08:00
parent 3f0e265baf
commit 539263a8ba
11 changed files with 212 additions and 130 deletions

View File

@@ -1,21 +1,22 @@
aiohttp
aiosqlite
bs4
fastapi
html2text
httpx
litellm
nltk
pydantic
python-dotenv
requests
rich
scikit-learn
selenium
uvicorn
transformers
chromedriver-autoinstaller
torch
onnxruntime
tokenizers
pillow
numpy==1.25.0
aiohttp==3.9.5
aiosqlite==0.20.0
beautifulsoup4==4.12.3
fastapi==0.111.0
html2text==2024.2.26
httpx==0.27.0
litellm==1.40.17
nltk==3.8.1
pydantic==2.7.4
python-dotenv==1.0.1
requests==2.32.3
rich==13.7.1
scikit-learn==1.5.0
selenium==4.21.0
uvicorn==0.30.1
transformers==4.41.2
chromedriver-autoinstaller==0.6.4
torch==2.3.1
onnxruntime==1.18.0
tokenizers==0.19.1
pillow==10.3.0