Sometimes json output from llm like this : {“key”: “value”}“` that langchain JsonOutputParser regex cant parse. The regex validation function is in langchain_core.output_parsers.json.parse_json_markdown(). So we just monkey patch that function
# monkey_patch.py
from typing import Callable, Any
from langchain_core.output_parsers.json import _custom_parser, parse_partial_json
import langchain_core.output_parsers.json as lc_op_j
import re
def parse_json_markdown(
json_string: str, *, parser: Callable[[str], Any] = parse_partial_json
) -> dict:
"""
Parse a JSON string from a Markdown string.
Args:
json_string: The Markdown string.
Returns:
The parsed JSON object as a Python dictionary.
"""
# Try to find JSON string within triple backticks
match = re.search(r"(?:```)?(json)?(.*)", json_string, re.DOTALL) # --------> patch
# If no match found, assume the entire string is a JSON string
if match is None:
json_str = json_string
else:
# If match found, use the content within the backticks
json_str = match.group(2)
# Strip whitespace and newlines from the start and end
json_str = json_str.strip().strip("`")
# handle newlines and other special characters inside the returned value
json_str = _custom_parser(json_str)
# Parse the JSON string into a Python dictionary
parsed = parser(json_str)
return parsed
lc_op_j.parse_json_markdown = parse_json_markdown
Here the test
import unittest
import monkey_patch as _
from langchain_core.output_parsers.json import JsonOutputParser
class TestMonkeyPatch(unittest.TestCase):
def test_json_output_parse(self):
results = map(lambda x: JsonOutputParser().parse(x), [
'{"key": "value"',
'```{"key": "value"}```',
'```{"key": "value"}',
'```{"key": "value"',
'```{"key": "value"```',
'```json{"key": "value"}```',
'```json{"key": "value"}',
'```json{"key": "value"',
'```json{"key": "value"```',
'```{"key": "value", "key2": "value2"```',
'{"key": "value"}```', # ----> monkeypatch for langchain
])
for result in results:
self.assertEqual("value", result["key"])