Refactor HTML block delimiter to use config constant
This commit is contained in:
@@ -102,6 +102,9 @@ SCREENSHOT_HEIGHT_TRESHOLD = 10000
|
||||
PAGE_TIMEOUT = 60000
|
||||
DOWNLOAD_PAGE_TIMEOUT = 60000
|
||||
|
||||
# Delimiter for concatenating multiple HTML examples in schema generation
|
||||
HTML_EXAMPLE_DELIMITER = "=== HTML EXAMPLE {index} ==="
|
||||
|
||||
# Global user settings with descriptions and default values
|
||||
USER_SETTINGS = {
|
||||
"DEFAULT_LLM_PROVIDER": {
|
||||
|
||||
@@ -13,6 +13,7 @@ from .config import (
|
||||
CHUNK_TOKEN_THRESHOLD,
|
||||
OVERLAP_RATE,
|
||||
WORD_TOKEN_RATE,
|
||||
HTML_EXAMPLE_DELIMITER,
|
||||
)
|
||||
from .utils import * # noqa: F403
|
||||
|
||||
@@ -1488,7 +1489,8 @@ In this scenario, use your best judgment to generate the schema. You need to exa
|
||||
attr_value_threshold=500,
|
||||
max_size=500_000
|
||||
)
|
||||
html_parts.append(f"'''html example {i}\n{cleaned}\n'''")
|
||||
header = HTML_EXAMPLE_DELIMITER.format(index=i)
|
||||
html_parts.append(f"{header}\n{cleaned}")
|
||||
html = "\n\n".join(html_parts)
|
||||
|
||||
# Preprocess HTML for schema generation (skip if already preprocessed from multiple URLs)
|
||||
|
||||
Reference in New Issue
Block a user