From 4ef06b77291c485bff9645aa5b13196ec03234c8 Mon Sep 17 00:00:00 2001 From: David Tam Date: Tue, 16 Dec 2025 15:58:13 -0800 Subject: [PATCH 1/5] add optimistic json parse to avoid corner case in embbeded code blocks in json --- guardrails/utils/parsing_utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/guardrails/utils/parsing_utils.py b/guardrails/utils/parsing_utils.py index 11163549e..b483b3abd 100644 --- a/guardrails/utils/parsing_utils.py +++ b/guardrails/utils/parsing_utils.py @@ -75,6 +75,15 @@ def get_code_block( def extract_json_from_ouput( output: str, ) -> Tuple[Optional[Union[Dict, List]], Optional[Exception]]: + # try to load the whole output as json first + # there can be corner cases with code blocks + # and json/codeblocks inside json + try: + output_as_dict = json.loads(output, strict=False) + return output_as_dict, None + except json.decoder.JSONDecodeError: + pass + # Find and extract json from code blocks extracted_code_block = output has_json_block, json_start, json_end = has_code_block(output, "json") From 6dc052173a19e6ec251ca3a100792c8d269114b7 Mon Sep 17 00:00:00 2001 From: David Tam Date: Tue, 16 Dec 2025 16:00:54 -0800 Subject: [PATCH 2/5] add test --- tests/unit_tests/utils/test_json_utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit_tests/utils/test_json_utils.py b/tests/unit_tests/utils/test_json_utils.py index b34e13efb..fea011a2a 100644 --- a/tests/unit_tests/utils/test_json_utils.py +++ b/tests/unit_tests/utils/test_json_utils.py @@ -75,6 +75,11 @@ not_even_json = "This isn't even json..." +codeblock_inside_json = """ +{ + "data": "Here is a code block: ```json {\"a\": 1}```" +} +""" @pytest.mark.parametrize( "llm_ouput,expected_output,expected_error", @@ -84,6 +89,7 @@ (no_code_block, {"a": 1}, None), (text_with_no_code_block, {"a": 1, "b": {"c": [{"d": 2}, {"e": 3}]}}, None), (text_with_json_code_block, {"a": 1}, None), + (codeblock_inside_json, {"data": "Here is a code block: ```json {\"a\": 1}```"}, None), (js_code_block, None, "Expecting value: line 1 column 1 (char 0)"), ( invalid_json_code_block__quotes, From 5eeae6ee03ae072e24fb3f00da69c773666d1b6e Mon Sep 17 00:00:00 2001 From: David Tam Date: Tue, 16 Dec 2025 16:03:10 -0800 Subject: [PATCH 3/5] lint --- guardrails/utils/parsing_utils.py | 2 +- tests/unit_tests/utils/test_json_utils.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/guardrails/utils/parsing_utils.py b/guardrails/utils/parsing_utils.py index b483b3abd..ada4bd17b 100644 --- a/guardrails/utils/parsing_utils.py +++ b/guardrails/utils/parsing_utils.py @@ -76,7 +76,7 @@ def extract_json_from_ouput( output: str, ) -> Tuple[Optional[Union[Dict, List]], Optional[Exception]]: # try to load the whole output as json first - # there can be corner cases with code blocks + # there can be corner cases with code blocks # and json/codeblocks inside json try: output_as_dict = json.loads(output, strict=False) diff --git a/tests/unit_tests/utils/test_json_utils.py b/tests/unit_tests/utils/test_json_utils.py index fea011a2a..8c7181192 100644 --- a/tests/unit_tests/utils/test_json_utils.py +++ b/tests/unit_tests/utils/test_json_utils.py @@ -81,6 +81,7 @@ } """ + @pytest.mark.parametrize( "llm_ouput,expected_output,expected_error", [ @@ -89,7 +90,11 @@ (no_code_block, {"a": 1}, None), (text_with_no_code_block, {"a": 1, "b": {"c": [{"d": 2}, {"e": 3}]}}, None), (text_with_json_code_block, {"a": 1}, None), - (codeblock_inside_json, {"data": "Here is a code block: ```json {\"a\": 1}```"}, None), + ( + codeblock_inside_json, + {"data": 'Here is a code block: ```json {"a": 1}```'}, + None, + ), (js_code_block, None, "Expecting value: line 1 column 1 (char 0)"), ( invalid_json_code_block__quotes, From 494e53abcd7719fe80e3132b61d360548a34c65a Mon Sep 17 00:00:00 2001 From: David Tam Date: Tue, 16 Dec 2025 16:27:08 -0800 Subject: [PATCH 4/5] fix test --- tests/unit_tests/utils/test_json_utils.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/unit_tests/utils/test_json_utils.py b/tests/unit_tests/utils/test_json_utils.py index 8c7181192..bafb40bac 100644 --- a/tests/unit_tests/utils/test_json_utils.py +++ b/tests/unit_tests/utils/test_json_utils.py @@ -1,5 +1,5 @@ import pytest - +import json from guardrails.utils.parsing_utils import extract_json_from_ouput @@ -75,12 +75,9 @@ not_even_json = "This isn't even json..." -codeblock_inside_json = """ -{ - "data": "Here is a code block: ```json {\"a\": 1}```" -} -""" - +codeblock_inside_json = json.dumps({ + "data": 'hello ```json\n{\"foo\":\"<...>\"}\n```' + }) @pytest.mark.parametrize( "llm_ouput,expected_output,expected_error", @@ -92,7 +89,7 @@ (text_with_json_code_block, {"a": 1}, None), ( codeblock_inside_json, - {"data": 'Here is a code block: ```json {"a": 1}```'}, + {"data": 'hello ```json\n{\"foo\":\"<...>\"}\n```'}, None, ), (js_code_block, None, "Expecting value: line 1 column 1 (char 0)"), From 13549bf6608c3522f1cd3bc6a2c9044b16bb6a5f Mon Sep 17 00:00:00 2001 From: David Tam Date: Tue, 16 Dec 2025 16:29:50 -0800 Subject: [PATCH 5/5] more lint --- tests/unit_tests/utils/test_json_utils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/unit_tests/utils/test_json_utils.py b/tests/unit_tests/utils/test_json_utils.py index bafb40bac..331077928 100644 --- a/tests/unit_tests/utils/test_json_utils.py +++ b/tests/unit_tests/utils/test_json_utils.py @@ -75,9 +75,8 @@ not_even_json = "This isn't even json..." -codeblock_inside_json = json.dumps({ - "data": 'hello ```json\n{\"foo\":\"<...>\"}\n```' - }) +codeblock_inside_json = json.dumps({"data": 'hello ```json\n{"foo":"<...>"}\n```'}) + @pytest.mark.parametrize( "llm_ouput,expected_output,expected_error", @@ -89,7 +88,7 @@ (text_with_json_code_block, {"a": 1}, None), ( codeblock_inside_json, - {"data": 'hello ```json\n{\"foo\":\"<...>\"}\n```'}, + {"data": 'hello ```json\n{"foo":"<...>"}\n```'}, None, ), (js_code_block, None, "Expecting value: line 1 column 1 (char 0)"),