fix:Make JsonCssExtractionStrategy.generate_schema resilient to markdown tags generated by LLMs https://github.com/unclecode/crawl4ai/issues/1663

This commit is contained in:
Aravind Karnam
2025-12-09 15:23:56 +05:30
parent 9672afded2
commit b0b2b2761c

View File

@@ -1378,9 +1378,10 @@ In this scenario, use your best judgment to generate the schema. You need to exa
base_url=llm_config.base_url, base_url=llm_config.base_url,
extra_args=kwargs extra_args=kwargs
) )
# Simply strip the markdown formatting
raw_json = response.choices[0].message.content.replace('```json\n', '').replace('\n```', '')
# Extract and return schema # Extract and return schema
return json.loads(response.choices[0].message.content) return json.loads(raw_json)
except Exception as e: except Exception as e:
raise Exception(f"Failed to generate schema: {str(e)}") raise Exception(f"Failed to generate schema: {str(e)}")