fix: Check for raw: and raw:// URLs before auto-appending https:// prefix

- Add raw HTML URL validation alongside http/https checks
- Fix URL preprocessing logic to handle raw: and raw:// prefixes
- Update error message and add comprehensive test cases
This commit is contained in:
Soham Kukreti
2025-08-11 22:10:53 +05:30
parent f0ce7b2710
commit f30811b524
4 changed files with 42 additions and 11 deletions

View File

@@ -237,9 +237,9 @@ async def get_markdown(
body: MarkdownRequest,
_td: Dict = Depends(token_dep),
):
if not body.url.startswith(("http://", "https://")):
if not body.url.startswith(("http://", "https://")) and not body.url.startswith(("raw:", "raw://")):
raise HTTPException(
400, "URL must be absolute and start with http/https")
400, "Invalid URL format. Must start with http://, https://, or for raw HTML (raw:, raw://)")
markdown = await handle_markdown_request(
body.url, body.f, body.q, body.c, config, body.provider
)
@@ -401,7 +401,7 @@ async def llm_endpoint(
):
if not q:
raise HTTPException(400, "Query parameter 'q' is required")
if not url.startswith(("http://", "https://")):
if not url.startswith(("http://", "https://")) and not url.startswith(("raw:", "raw://")):
url = "https://" + url
answer = await handle_llm_qa(url, q, config)
return JSONResponse({"answer": answer})