Langchain JsonOutputParser Invalid json output Error

Sometimes json output from llm like this : {“key”: “value”}“` that langchain JsonOutputParser regex cant parse. The regex validation function is in langchain_core.output_parsers.json.parse_json_markdown(). So we just monkey patch that function

# monkey_patch.py
from typing import Callable, Any
from langchain_core.output_parsers.json import _custom_parser, parse_partial_json
import langchain_core.output_parsers.json as lc_op_j
import re

def parse_json_markdown(
    json_string: str, *, parser: Callable[[str], Any] = parse_partial_json
) -> dict:
    """
    Parse a JSON string from a Markdown string.

    Args:
        json_string: The Markdown string.

    Returns:
        The parsed JSON object as a Python dictionary.
    """
    # Try to find JSON string within triple backticks
    match = re.search(r"(?:```)?(json)?(.*)", json_string, re.DOTALL) # --------> patch

    # If no match found, assume the entire string is a JSON string
    if match is None:
        json_str = json_string
    else:
        # If match found, use the content within the backticks
        json_str = match.group(2)

    # Strip whitespace and newlines from the start and end
    json_str = json_str.strip().strip("`")

    # handle newlines and other special characters inside the returned value
    json_str = _custom_parser(json_str)

    # Parse the JSON string into a Python dictionary
    parsed = parser(json_str)


    return parsed

lc_op_j.parse_json_markdown = parse_json_markdown

Here the test

Continue reading “Langchain JsonOutputParser Invalid json output Error”