Refactor HTML block delimiter to use config constant

This commit is contained in:
unclecode
2026-01-24 04:19:50 +00:00
parent 2d5e5306c5
commit 79ebfce913
2 changed files with 6 additions and 1 deletions

View File

@@ -102,6 +102,9 @@ SCREENSHOT_HEIGHT_TRESHOLD = 10000
PAGE_TIMEOUT = 60000
DOWNLOAD_PAGE_TIMEOUT = 60000
# Delimiter for concatenating multiple HTML examples in schema generation
HTML_EXAMPLE_DELIMITER = "=== HTML EXAMPLE {index} ==="
# Global user settings with descriptions and default values
USER_SETTINGS = {
"DEFAULT_LLM_PROVIDER": {

View File

@@ -13,6 +13,7 @@ from .config import (
CHUNK_TOKEN_THRESHOLD,
OVERLAP_RATE,
WORD_TOKEN_RATE,
HTML_EXAMPLE_DELIMITER,
)
from .utils import * # noqa: F403
@@ -1488,7 +1489,8 @@ In this scenario, use your best judgment to generate the schema. You need to exa
attr_value_threshold=500,
max_size=500_000
)
html_parts.append(f"'''html example {i}\n{cleaned}\n'''")
header = HTML_EXAMPLE_DELIMITER.format(index=i)
html_parts.append(f"{header}\n{cleaned}")
html = "\n\n".join(html_parts)
# Preprocess HTML for schema generation (skip if already preprocessed from multiple URLs)