Refactor HTML block delimiter to use config constant
This commit is contained in:
@@ -102,6 +102,9 @@ SCREENSHOT_HEIGHT_TRESHOLD = 10000
|
|||||||
PAGE_TIMEOUT = 60000
|
PAGE_TIMEOUT = 60000
|
||||||
DOWNLOAD_PAGE_TIMEOUT = 60000
|
DOWNLOAD_PAGE_TIMEOUT = 60000
|
||||||
|
|
||||||
|
# Delimiter for concatenating multiple HTML examples in schema generation
|
||||||
|
HTML_EXAMPLE_DELIMITER = "=== HTML EXAMPLE {index} ==="
|
||||||
|
|
||||||
# Global user settings with descriptions and default values
|
# Global user settings with descriptions and default values
|
||||||
USER_SETTINGS = {
|
USER_SETTINGS = {
|
||||||
"DEFAULT_LLM_PROVIDER": {
|
"DEFAULT_LLM_PROVIDER": {
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ from .config import (
|
|||||||
CHUNK_TOKEN_THRESHOLD,
|
CHUNK_TOKEN_THRESHOLD,
|
||||||
OVERLAP_RATE,
|
OVERLAP_RATE,
|
||||||
WORD_TOKEN_RATE,
|
WORD_TOKEN_RATE,
|
||||||
|
HTML_EXAMPLE_DELIMITER,
|
||||||
)
|
)
|
||||||
from .utils import * # noqa: F403
|
from .utils import * # noqa: F403
|
||||||
|
|
||||||
@@ -1488,7 +1489,8 @@ In this scenario, use your best judgment to generate the schema. You need to exa
|
|||||||
attr_value_threshold=500,
|
attr_value_threshold=500,
|
||||||
max_size=500_000
|
max_size=500_000
|
||||||
)
|
)
|
||||||
html_parts.append(f"'''html example {i}\n{cleaned}\n'''")
|
header = HTML_EXAMPLE_DELIMITER.format(index=i)
|
||||||
|
html_parts.append(f"{header}\n{cleaned}")
|
||||||
html = "\n\n".join(html_parts)
|
html = "\n\n".join(html_parts)
|
||||||
|
|
||||||
# Preprocess HTML for schema generation (skip if already preprocessed from multiple URLs)
|
# Preprocess HTML for schema generation (skip if already preprocessed from multiple URLs)
|
||||||
|
|||||||
Reference in New Issue
Block a user