From 4bb74a6ab15f6fb906dc2cc5bf12894ffb7f8d44 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Mon, 29 Jun 2026 12:48:18 +1000 Subject: [PATCH] MDEV-28922 JSON_NORMALIZE handling of duplicate keys Despite the JSON spec rfc8259/#section-4 "The names within an object SHOULD be unique"[1], it does happen. As our sort algorithm has access to both key and value, if the key matches, sort by the value. The type, and length of objects, strings, arrays and numbers is easy, so use those as a first comparision point. [1] https://www.rfc-editor.org/info/rfc8259/#section-4 --- mysql-test/main/json_normalize.result | 149 ++++++++++++++++++++++++++ mysql-test/main/json_normalize.test | 126 ++++++++++++++++++++++ strings/json_normalize.c | 87 ++++++++++++++- 3 files changed, 359 insertions(+), 3 deletions(-) diff --git a/mysql-test/main/json_normalize.result b/mysql-test/main/json_normalize.result index b1363ea909959..45a8e0f6d6285 100644 --- a/mysql-test/main/json_normalize.result +++ b/mysql-test/main/json_normalize.result @@ -75,3 +75,152 @@ select concat_ws(' ', json_normalize(t1.text), json_normalize(t1.text)) from t1; concat_ws(' ', json_normalize(t1.text), json_normalize(t1.text)) 0.0E0 0.0E0 drop table t1; +# +# MDEV-28922 JSON_NORMALIZE handling of duplicate keys +# +SET @j = '{"x": 1, "y": 2, "z": 3, "b": 0, "c": 5, "b":3, "a": 4, "b": 1, "a": 3, "a": 2, "a": 1, "b": 2}'; +SELECT @k := JSON_NORMALIZE(@j); +@k := JSON_NORMALIZE(@j) +{"a":1.0E0,"a":2.0E0,"a":3.0E0,"a":4.0E0,"b":0.0E0,"b":1.0E0,"b":2.0E0,"b":3.0E0,"c":5.0E0,"x":1.0E0,"y":2.0E0,"z":3.0E0} +SELECT JSON_EXTRACT(@j, '$.b') as j1, JSON_EXTRACT(@k, '$.b') as j2; +j1 j2 +0 0.0E0 +Primitives +SET @j='{"a": null, "a": true, "a": false, "a": 123, "a": "string"}'; +SELECT JSON_NORMALIZE(@j); +JSON_NORMALIZE(@j) +{"a":"string","a":1.23E2,"a":true,"a":false,"a":null} +SET @j2='{"a": "string", "a": 123, "a": false, "a": true, "a": null}'; +SELECT JSON_NORMALIZE(@j2); +JSON_NORMALIZE(@j2) +{"a":"string","a":1.23E2,"a":true,"a":false,"a":null} +SELECT JSON_EQUALS(@j, @j2) je; +je +1 +Objects +SET @j='{ + "a": {}, + "a": {"x":1}, + "a": {"x":1,"y":2}, + "a": {"x":1,"y":2,"z":3} +}'; +SELECT JSON_NORMALIZE(@j); +JSON_NORMALIZE(@j) +{"a":{},"a":{"x":1.0E0},"a":{"x":1.0E0,"y":2.0E0},"a":{"x":1.0E0,"y":2.0E0,"z":3.0E0}} +SET @j2='{ + "a": {"x":1,"y":2,"z":3}, + "a": {"x":1}, + "a": {}, + "a": {"x":1,"y":2} +}'; +SELECT JSON_NORMALIZE(@j2); +JSON_NORMALIZE(@j2) +{"a":{},"a":{"x":1.0E0},"a":{"x":1.0E0,"y":2.0E0},"a":{"x":1.0E0,"y":2.0E0,"z":3.0E0}} +SELECT JSON_EQUALS(@j, @j2) as je; +je +1 +Arrays +SET @j='{ + "a": [], + "a": [1], + "a": [1,2], + "a": [1,2,3], + "a": [1,2,3,4] +}'; +SELECT JSON_NORMALIZE(@j); +JSON_NORMALIZE(@j) +{"a":[],"a":[1.0E0],"a":[1.0E0,2.0E0],"a":[1.0E0,2.0E0,3.0E0],"a":[1.0E0,2.0E0,3.0E0,4.0E0]} +SET @j2='{ + "a": [1,2,3], + "a": [], + "a": [1,2,3,4], + "a": [1], + "a": [1,2] +}'; +SELECT JSON_NORMALIZE(@j2); +JSON_NORMALIZE(@j2) +{"a":[],"a":[1.0E0],"a":[1.0E0,2.0E0],"a":[1.0E0,2.0E0,3.0E0],"a":[1.0E0,2.0E0,3.0E0,4.0E0]} +SELECT JSON_EQUALS(@j, @j2) je; +je +1 +Mixed Arrays +SET @j='{ + "a": [], + "a": [null], + "a": [1,"x",true], + "a": [{"k":1}], + "a": [[1,2],[3,4]] +}'; +SELECT JSON_NORMALIZE(@j); +JSON_NORMALIZE(@j) +{"a":[],"a":[{"k":1.0E0}],"a":[null],"a":[[1.0E0,2.0E0],[3.0E0,4.0E0]],"a":[1.0E0,"x",true]} +SET @j2='{ + "a": [[1,2],[3,4]], + "a": [{"k":1}], + "a": [], + "a": [1,"x",true], + "a": [null] +}'; +SELECT JSON_NORMALIZE(@j2); +JSON_NORMALIZE(@j2) +{"a":[],"a":[{"k":1.0E0}],"a":[null],"a":[[1.0E0,2.0E0],[3.0E0,4.0E0]],"a":[1.0E0,"x",true]} +SELECT JSON_EQUALS(@j, @j2) je; +je +1 +Nested objects +SET @j='{ + "a": {"x":{"y":1}}, + "a": {"x":{"y":[1,2]}}, + "a": {"x":{"y":{"z":3}}}, + "a": {"x":{}} +}'; +SELECT JSON_NORMALIZE(@j); +JSON_NORMALIZE(@j) +{"a":{"x":{}},"a":{"x":{"y":{"z":3.0E0}}},"a":{"x":{"y":[1.0E0,2.0E0]}},"a":{"x":{"y":1.0E0}}} +SET @j2='{ + "a": {"x":{}}, + "a": {"x":{"y":{"z":3}}}, + "a": {"x":{"y":1}}, + "a": {"x":{"y":[1,2]}} +}'; +SELECT JSON_NORMALIZE(@j2); +JSON_NORMALIZE(@j2) +{"a":{"x":{}},"a":{"x":{"y":{"z":3.0E0}}},"a":{"x":{"y":[1.0E0,2.0E0]}},"a":{"x":{"y":1.0E0}}} +SELECT JSON_EQUALS(@j, @j2) je; +je +1 +Everything mixed +SET @j='{ + "a": null, + "a": 1, + "a": "abc", + "a": true, + "a": [], + "a": [1], + "a": [1,2], + "a": {}, + "a": {"k":1}, + "a": {"k":1,"m":2} +}'; +SELECT JSON_NORMALIZE(@j); +JSON_NORMALIZE(@j) +{"a":{},"a":{"k":1.0E0},"a":{"k":1.0E0,"m":2.0E0},"a":[],"a":[1.0E0],"a":[1.0E0,2.0E0],"a":"abc","a":1.0E0,"a":true,"a":null} +SET @j2='{ + "a": {"k":1}, + "a": [], + "a": true, + "a": {"k":1,"m":2}, + "a": "abc", + "a": null, + "a": [1,2], + "a": 1, + "a": {}, + "a": [1] +}'; +SELECT JSON_NORMALIZE(@j2); +JSON_NORMALIZE(@j2) +{"a":{},"a":{"k":1.0E0},"a":{"k":1.0E0,"m":2.0E0},"a":[],"a":[1.0E0],"a":[1.0E0,2.0E0],"a":"abc","a":1.0E0,"a":true,"a":null} +SELECT JSON_EQUALS(@j, @j2) je; +je +1 +# End of 10.11 tests diff --git a/mysql-test/main/json_normalize.test b/mysql-test/main/json_normalize.test index 29faa514ccad0..a1c0b12026d57 100644 --- a/mysql-test/main/json_normalize.test +++ b/mysql-test/main/json_normalize.test @@ -56,3 +56,129 @@ select concat_ws(' ', t1.text, t1.text) from t1; select concat_ws(' ', json_normalize(t1.text), json_normalize(t1.text)) from t1; drop table t1; + +--echo # +--echo # MDEV-28922 JSON_NORMALIZE handling of duplicate keys +--echo # + +SET @j = '{"x": 1, "y": 2, "z": 3, "b": 0, "c": 5, "b":3, "a": 4, "b": 1, "a": 3, "a": 2, "a": 1, "b": 2}'; +SELECT @k := JSON_NORMALIZE(@j); +SELECT JSON_EXTRACT(@j, '$.b') as j1, JSON_EXTRACT(@k, '$.b') as j2; + +--echo Primitives + +SET @j='{"a": null, "a": true, "a": false, "a": 123, "a": "string"}'; +SELECT JSON_NORMALIZE(@j); + +SET @j2='{"a": "string", "a": 123, "a": false, "a": true, "a": null}'; +SELECT JSON_NORMALIZE(@j2); +SELECT JSON_EQUALS(@j, @j2) je; + +--echo Objects +SET @j='{ + "a": {}, + "a": {"x":1}, + "a": {"x":1,"y":2}, + "a": {"x":1,"y":2,"z":3} +}'; +SELECT JSON_NORMALIZE(@j); + +SET @j2='{ + "a": {"x":1,"y":2,"z":3}, + "a": {"x":1}, + "a": {}, + "a": {"x":1,"y":2} +}'; +SELECT JSON_NORMALIZE(@j2); +SELECT JSON_EQUALS(@j, @j2) as je; + +--echo Arrays + +SET @j='{ + "a": [], + "a": [1], + "a": [1,2], + "a": [1,2,3], + "a": [1,2,3,4] +}'; +SELECT JSON_NORMALIZE(@j); + +SET @j2='{ + "a": [1,2,3], + "a": [], + "a": [1,2,3,4], + "a": [1], + "a": [1,2] +}'; +SELECT JSON_NORMALIZE(@j2); +SELECT JSON_EQUALS(@j, @j2) je; + +--echo Mixed Arrays +SET @j='{ + "a": [], + "a": [null], + "a": [1,"x",true], + "a": [{"k":1}], + "a": [[1,2],[3,4]] +}'; +SELECT JSON_NORMALIZE(@j); + +SET @j2='{ + "a": [[1,2],[3,4]], + "a": [{"k":1}], + "a": [], + "a": [1,"x",true], + "a": [null] +}'; +SELECT JSON_NORMALIZE(@j2); +SELECT JSON_EQUALS(@j, @j2) je; + +--echo Nested objects +SET @j='{ + "a": {"x":{"y":1}}, + "a": {"x":{"y":[1,2]}}, + "a": {"x":{"y":{"z":3}}}, + "a": {"x":{}} +}'; +SELECT JSON_NORMALIZE(@j); + +SET @j2='{ + "a": {"x":{}}, + "a": {"x":{"y":{"z":3}}}, + "a": {"x":{"y":1}}, + "a": {"x":{"y":[1,2]}} +}'; +SELECT JSON_NORMALIZE(@j2); +SELECT JSON_EQUALS(@j, @j2) je; + +--echo Everything mixed +SET @j='{ + "a": null, + "a": 1, + "a": "abc", + "a": true, + "a": [], + "a": [1], + "a": [1,2], + "a": {}, + "a": {"k":1}, + "a": {"k":1,"m":2} +}'; +SELECT JSON_NORMALIZE(@j); + +SET @j2='{ + "a": {"k":1}, + "a": [], + "a": true, + "a": {"k":1,"m":2}, + "a": "abc", + "a": null, + "a": [1,2], + "a": 1, + "a": {}, + "a": [1] +}'; +SELECT JSON_NORMALIZE(@j2); +SELECT JSON_EQUALS(@j, @j2) je; + +--echo # End of 10.11 tests diff --git a/strings/json_normalize.c b/strings/json_normalize.c index a7849d95b3cf4..7854ecb84c02b 100644 --- a/strings/json_normalize.c +++ b/strings/json_normalize.c @@ -308,12 +308,93 @@ json_norm_value_string_init(struct json_norm_value *val, } +static int json_norm_kv_comp(const void *a_, const void *b_); + +static int json_norm_value_comp(const struct json_norm_value *a, + const struct json_norm_value *b) +{ + if (a->type != b->type) + return (int) a->type - b->type; + + switch (a->type) + { + case JSON_VALUE_OBJECT: + { + const DYNAMIC_ARRAY *ao= &a->value.object.kv_pairs; + const DYNAMIC_ARRAY *bo= &b->value.object.kv_pairs; + int ret; + if (ao->elements != bo->elements) + return ao->elements < bo->elements ? -1 : 1; + + for (size_t i= 0; i < ao->elements; ++i) + { + const struct json_norm_kv *akv= + dynamic_element(ao, i, struct json_norm_kv*); + const struct json_norm_kv *bkv= + dynamic_element(bo, i, struct json_norm_kv*); + if ((ret= json_norm_kv_comp(akv, bkv))) + return ret; + } + return 0; + } + case JSON_VALUE_ARRAY: + { + const DYNAMIC_ARRAY *aa= &a->value.array.values; + const DYNAMIC_ARRAY *ba= &b->value.array.values; + int ret; + + if (aa->elements != ba->elements) + return aa->elements < ba->elements ? -1 : 1; + + for (size_t i= 0; i < aa->elements; ++i) + { + const struct json_norm_value *aval= + dynamic_element(aa, i, struct json_norm_value*); + const struct json_norm_value *bval= + dynamic_element(ba, i, struct json_norm_value*); + if ((ret= json_norm_value_comp(aval, bval))) + return ret; + } + return 0; + } + case JSON_VALUE_STRING: + { + const LEX_STRING *as= &a->value.string; + const LEX_STRING *bs= &b->value.string; + if (as->length != bs->length) + return as->length < bs->length ? -1 : 1; + + return my_strnncoll(&my_charset_utf8mb4_bin, + (const uchar *)as->str, as->length, + (const uchar *)bs->str, bs->length); + } + case JSON_VALUE_NUMBER: + { + const DYNAMIC_STRING *anum= &a->value.number; + const DYNAMIC_STRING *bnum= &b->value.number; + if (anum->length != bnum->length) + return anum->length < bnum->length ? -1 : 1; + return strncmp(anum->str, bnum->str, anum->length); + } + case JSON_VALUE_NULL: + case JSON_VALUE_TRUE: + case JSON_VALUE_FALSE: + case JSON_VALUE_UNINITIALIZED: + default: + return 0; + } +} + + static int json_norm_kv_comp(const void *a_, const void *b_) { const struct json_norm_kv *a= a_, *b= b_; - return my_strnncoll(&my_charset_utf8mb4_bin, - (const uchar *)a->key.str, a->key.length, - (const uchar *)b->key.str, b->key.length); + int ret; + if (!(ret= my_strnncoll(&my_charset_utf8mb4_bin, + (const uchar *)a->key.str, a->key.length, + (const uchar *)b->key.str, b->key.length))) + return json_norm_value_comp(&a->value, &b->value); + return ret; }