From b0b2b2761ca36ee072536cc0982a1a67e2ed216f Mon Sep 17 00:00:00 2001 From: Aravind Karnam Date: Tue, 9 Dec 2025 15:23:56 +0530 Subject: [PATCH] fix:Make JsonCssExtractionStrategy.generate_schema resilient to markdown tags generated by LLMs https://github.com/unclecode/crawl4ai/issues/1663 --- crawl4ai/extraction_strategy.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crawl4ai/extraction_strategy.py b/crawl4ai/extraction_strategy.py index 7033e380..fcb73e5f 100644 --- a/crawl4ai/extraction_strategy.py +++ b/crawl4ai/extraction_strategy.py @@ -1378,9 +1378,10 @@ In this scenario, use your best judgment to generate the schema. You need to exa base_url=llm_config.base_url, extra_args=kwargs ) - + # Simply strip the markdown formatting + raw_json = response.choices[0].message.content.replace('```json\n', '').replace('\n```', '') # Extract and return schema - return json.loads(response.choices[0].message.content) + return json.loads(raw_json) except Exception as e: raise Exception(f"Failed to generate schema: {str(e)}")