Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7afa11a02f | ||
|
|
dec3d44224 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -189,4 +189,6 @@ a.txt
|
|||||||
.lambda_function.py
|
.lambda_function.py
|
||||||
ec2*
|
ec2*
|
||||||
|
|
||||||
update_changelog.sh
|
update_changelog.sh
|
||||||
|
test_env/
|
||||||
|
tmp/
|
||||||
@@ -101,7 +101,7 @@ class LLMExtractionStrategy(ExtractionStrategy):
|
|||||||
variable_values["REQUEST"] = self.instruction
|
variable_values["REQUEST"] = self.instruction
|
||||||
prompt_with_variables = PROMPT_EXTRACT_BLOCKS_WITH_INSTRUCTION
|
prompt_with_variables = PROMPT_EXTRACT_BLOCKS_WITH_INSTRUCTION
|
||||||
|
|
||||||
if self.extract_type == "schema":
|
if self.extract_type == "schema" and self.schema:
|
||||||
variable_values["SCHEMA"] = json.dumps(self.schema, indent=2)
|
variable_values["SCHEMA"] = json.dumps(self.schema, indent=2)
|
||||||
prompt_with_variables = PROMPT_EXTRACT_SCHEMA_WITH_INSTRUCTION
|
prompt_with_variables = PROMPT_EXTRACT_SCHEMA_WITH_INSTRUCTION
|
||||||
|
|
||||||
|
|||||||
@@ -834,7 +834,6 @@ def extract_blocks_batch(batch_data, provider = "groq/llama3-70b-8192", api_toke
|
|||||||
|
|
||||||
return sum(all_blocks, [])
|
return sum(all_blocks, [])
|
||||||
|
|
||||||
|
|
||||||
def merge_chunks_based_on_token_threshold(chunks, token_threshold):
|
def merge_chunks_based_on_token_threshold(chunks, token_threshold):
|
||||||
"""
|
"""
|
||||||
Merges small chunks into larger ones based on the total token threshold.
|
Merges small chunks into larger ones based on the total token threshold.
|
||||||
@@ -880,7 +879,6 @@ def process_sections(url: str, sections: list, provider: str, api_token: str) ->
|
|||||||
|
|
||||||
return extracted_content
|
return extracted_content
|
||||||
|
|
||||||
|
|
||||||
def wrap_text(draw, text, font, max_width):
|
def wrap_text(draw, text, font, max_width):
|
||||||
# Wrap the text to fit within the specified width
|
# Wrap the text to fit within the specified width
|
||||||
lines = []
|
lines = []
|
||||||
@@ -892,7 +890,6 @@ def wrap_text(draw, text, font, max_width):
|
|||||||
lines.append(line)
|
lines.append(line)
|
||||||
return '\n'.join(lines)
|
return '\n'.join(lines)
|
||||||
|
|
||||||
|
|
||||||
def format_html(html_string):
|
def format_html(html_string):
|
||||||
soup = BeautifulSoup(html_string, 'html.parser')
|
soup = BeautifulSoup(html_string, 'html.parser')
|
||||||
return soup.prettify()
|
return soup.prettify()
|
||||||
|
|||||||
2
setup.py
2
setup.py
@@ -19,7 +19,7 @@ with open("requirements.txt") as f:
|
|||||||
requirements = f.read().splitlines()
|
requirements = f.read().splitlines()
|
||||||
|
|
||||||
# Define the requirements for different environments
|
# Define the requirements for different environments
|
||||||
default_requirements = [req for req in requirements if not req.startswith(("torch", "transformers", "onnxruntime", "nltk", "spacy", "tokenizers", "scikit-learn", "numpy"))]
|
default_requirements = [req for req in requirements if not req.startswith(("torch", "transformers", "onnxruntime", "nltk", "spacy", "tokenizers", "scikit-learn"))]
|
||||||
torch_requirements = [req for req in requirements if req.startswith(("torch", "nltk", "spacy", "scikit-learn", "numpy"))]
|
torch_requirements = [req for req in requirements if req.startswith(("torch", "nltk", "spacy", "scikit-learn", "numpy"))]
|
||||||
transformer_requirements = [req for req in requirements if req.startswith(("transformers", "tokenizers", "onnxruntime"))]
|
transformer_requirements = [req for req in requirements if req.startswith(("transformers", "tokenizers", "onnxruntime"))]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user