feat(api): improve cache handling and add API tests

Changes cache mode from BYPASS to WRITE_ONLY when cache is disabled to ensure results are still cached for future use. Also adds error handling for non-JSON LLM responses and comprehensive API test suite. - Changes default cache fallback from BYPASS to WRITE_ONLY - Adds error handling for LLM JSON parsing - Introduces new test suite for API endpoints
2025-02-02 20:53:31 +08:00
parent 33a21d6a7a
commit 04bc643cec
2 changed files with 152 additions and 3 deletions
--- a/deploy/docker/api.py
+++ b/deploy/docker/api.py
@@ -51,7 +51,7 @@ async def process_llm_extraction(
            schema=json.loads(schema) if schema else None,
        )

-        cache_mode = CacheMode.ENABLED if cache == "1" else CacheMode.BYPASS
+        cache_mode = CacheMode.ENABLED if cache == "1" else CacheMode.WRITE_ONLY

        async with AsyncWebCrawler() as crawler:
            result = await crawler.arun(
@@ -70,7 +70,10 @@ async def process_llm_extraction(
            })
            return

-        content = json.loads(result.extracted_content)
+        try:
+            content = json.loads(result.extracted_content)
+        except json.JSONDecodeError:
+            content = result.extracted_content
        await redis.hset(f"task:{task_id}", mapping={
            "status": TaskStatus.COMPLETED,
            "result": json.dumps(content)
@@ -110,7 +113,7 @@ async def handle_markdown_request(
            }[filter_type]
            md_generator = DefaultMarkdownGenerator(content_filter=content_filter)

-        cache_mode = CacheMode.ENABLED if cache == "1" else CacheMode.BYPASS
+        cache_mode = CacheMode.ENABLED if cache == "1" else CacheMode.WRITE_ONLY

        async with AsyncWebCrawler() as crawler:
            result = await crawler.arun(