diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java index 4ca363f75e71..182aa8613739 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java @@ -272,6 +272,7 @@ private void fillColumnVector(PrimitiveObjectInspector.PrimitiveCategory categor for (int i = 0; i < valueList.size(); i++) { if (valueList.get(i) == null) { lcv.child.isNull[i] = true; + lcv.child.noNulls = false; } else { ((LongColumnVector) lcv.child).vector[i] = ((List) valueList).get(i); } @@ -287,6 +288,7 @@ private void fillColumnVector(PrimitiveObjectInspector.PrimitiveCategory categor for (int i = 0; i < valueList.size(); i++) { if (valueList.get(i) == null) { lcv.child.isNull[i] = true; + lcv.child.noNulls = false; } else { ((LongColumnVector) lcv.child).vector[i] = ((List) valueList).get(i); } @@ -297,6 +299,7 @@ private void fillColumnVector(PrimitiveObjectInspector.PrimitiveCategory categor for (int i = 0; i < valueList.size(); i++) { if (valueList.get(i) == null) { lcv.child.isNull[i] = true; + lcv.child.noNulls = false; } else { ((DoubleColumnVector) lcv.child).vector[i] = ((List) valueList).get(i); } @@ -313,6 +316,7 @@ private void fillColumnVector(PrimitiveObjectInspector.PrimitiveCategory categor if (src == null) { ((BytesColumnVector) lcv.child).setRef(i, src, 0, 0); lcv.child.isNull[i] = true; + lcv.child.noNulls = false; } else { ((BytesColumnVector) lcv.child).setRef(i, src, 0, src.length); } @@ -323,6 +327,7 @@ private void fillColumnVector(PrimitiveObjectInspector.PrimitiveCategory categor for (int i = 0; i < valueList.size(); i++) { if (valueList.get(i) == null) { lcv.child.isNull[i] = true; + lcv.child.noNulls = false; } else { ((DoubleColumnVector) lcv.child).vector[i] = ((List) valueList).get(i); } @@ -337,6 +342,7 @@ private void fillColumnVector(PrimitiveObjectInspector.PrimitiveCategory categor for (int i = 0; i < valueList.size(); i++) { if (valueList.get(i) == null) { lcv.child.isNull[i] = true; + lcv.child.noNulls = false; } else { ((DecimalColumnVector) lcv.child).vector[i].set(((List) valueList).get(i), scale); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedMapColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedMapColumnReader.java index cbfa0a0bbd4a..c61ec037932c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedMapColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedMapColumnReader.java @@ -65,5 +65,6 @@ public void readBatch(int total, ColumnVector column, TypeInfo columnType) throw mapColumnVector.childCount = keyListColumnVector.childCount; mapColumnVector.isRepeating = keyListColumnVector.isRepeating && valueListColumnVector.isRepeating; + mapColumnVector.noNulls = keyListColumnVector.noNulls && valueListColumnVector.noNulls; } } diff --git a/ql/src/test/queries/clientpositive/parquet_array_with_null_vectorization.q b/ql/src/test/queries/clientpositive/parquet_array_with_null_vectorization.q new file mode 100644 index 000000000000..f501cbbf01c7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_with_null_vectorization.q @@ -0,0 +1,67 @@ +-- SORT_QUERY_RESULTS +SET hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.reduce.enabled=true; +SET hive.fetch.task.conversion=none; + +CREATE TABLE test_parquet_array_nulls_bool ( + id INT, + arr_prim ARRAY +) STORED AS PARQUET; + +INSERT INTO test_parquet_array_nulls_bool VALUES + (1, array(CAST(NULL AS BOOLEAN), CAST(NULL AS BOOLEAN))), + (2, if(1=0, array(true, false), null)), + (3, array(true, CAST(NULL AS BOOLEAN))), + (4, array(true, false)); + +SELECT * FROM test_parquet_array_nulls_bool; + +CREATE TABLE test_parquet_array_nulls_double ( + id INT, + arr_prim ARRAY +) STORED AS PARQUET; + +INSERT INTO test_parquet_array_nulls_double +SELECT 1, array(CAST(NULL AS DOUBLE), CAST(NULL AS DOUBLE)) +UNION ALL +SELECT 2, CAST(NULL AS ARRAY) +UNION ALL +SELECT 3, array(CAST(3.3 AS DOUBLE), CAST(NULL AS DOUBLE)) +UNION ALL +SELECT 4, array(CAST(4.4 AS DOUBLE), CAST(5.5 AS DOUBLE)); + +SELECT * FROM test_parquet_array_nulls_double; + +CREATE TABLE test_parquet_array_nulls_varchar ( + id INT, + arr_prim ARRAY +) STORED AS PARQUET; + +INSERT INTO test_parquet_array_nulls_varchar +SELECT 1, array(CAST(NULL AS VARCHAR(20)), CAST(NULL AS VARCHAR(20))); +INSERT INTO test_parquet_array_nulls_varchar +SELECT 2, CAST(NULL AS ARRAY); + +SELECT * FROM test_parquet_array_nulls_varchar; + +CREATE TABLE test_parquet_array_nulls_float ( + id INT, + arr_prim ARRAY +) STORED AS PARQUET; + +INSERT INTO test_parquet_array_nulls_float +SELECT 1, array(CAST(NULL AS FLOAT), CAST(NULL AS FLOAT)) +UNION ALL +SELECT 2, CAST(NULL AS ARRAY) +UNION ALL +SELECT 3, array(CAST(3.3 AS FLOAT), CAST(NULL AS FLOAT)) +UNION ALL +SELECT 4, array(CAST(4.4 AS FLOAT), CAST(5.5 AS FLOAT)); + +SELECT * FROM test_parquet_array_nulls_float; + +SET hive.vectorized.execution.enabled=false; +SELECT * FROM test_parquet_array_nulls_bool; +SELECT * FROM test_parquet_array_nulls_double; +SELECT * FROM test_parquet_array_nulls_varchar; +SELECT * FROM test_parquet_array_nulls_float; diff --git a/ql/src/test/results/clientpositive/llap/parquet_array_with_null_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_array_with_null_vectorization.q.out new file mode 100644 index 000000000000..ceeb7f99a1d6 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_array_with_null_vectorization.q.out @@ -0,0 +1,238 @@ +PREHOOK: query: CREATE TABLE test_parquet_array_nulls_bool ( + id INT, + arr_prim ARRAY +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_parquet_array_nulls_bool +POSTHOOK: query: CREATE TABLE test_parquet_array_nulls_bool ( + id INT, + arr_prim ARRAY +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_parquet_array_nulls_bool +PREHOOK: query: INSERT INTO test_parquet_array_nulls_bool VALUES + (1, array(CAST(NULL AS BOOLEAN), CAST(NULL AS BOOLEAN))), + (2, if(1=0, array(true, false), null)), + (3, array(true, CAST(NULL AS BOOLEAN))), + (4, array(true, false)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_parquet_array_nulls_bool +POSTHOOK: query: INSERT INTO test_parquet_array_nulls_bool VALUES + (1, array(CAST(NULL AS BOOLEAN), CAST(NULL AS BOOLEAN))), + (2, if(1=0, array(true, false), null)), + (3, array(true, CAST(NULL AS BOOLEAN))), + (4, array(true, false)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_parquet_array_nulls_bool +POSTHOOK: Lineage: test_parquet_array_nulls_bool.arr_prim SCRIPT [] +POSTHOOK: Lineage: test_parquet_array_nulls_bool.id SCRIPT [] +PREHOOK: query: SELECT * FROM test_parquet_array_nulls_bool +PREHOOK: type: QUERY +PREHOOK: Input: default@test_parquet_array_nulls_bool +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM test_parquet_array_nulls_bool +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_parquet_array_nulls_bool +#### A masked pattern was here #### +1 [null,null] +2 NULL +3 [true,null] +4 [true,false] +PREHOOK: query: CREATE TABLE test_parquet_array_nulls_double ( + id INT, + arr_prim ARRAY +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_parquet_array_nulls_double +POSTHOOK: query: CREATE TABLE test_parquet_array_nulls_double ( + id INT, + arr_prim ARRAY +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_parquet_array_nulls_double +PREHOOK: query: INSERT INTO test_parquet_array_nulls_double +SELECT 1, array(CAST(NULL AS DOUBLE), CAST(NULL AS DOUBLE)) +UNION ALL +SELECT 2, CAST(NULL AS ARRAY) +UNION ALL +SELECT 3, array(CAST(3.3 AS DOUBLE), CAST(NULL AS DOUBLE)) +UNION ALL +SELECT 4, array(CAST(4.4 AS DOUBLE), CAST(5.5 AS DOUBLE)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_parquet_array_nulls_double +POSTHOOK: query: INSERT INTO test_parquet_array_nulls_double +SELECT 1, array(CAST(NULL AS DOUBLE), CAST(NULL AS DOUBLE)) +UNION ALL +SELECT 2, CAST(NULL AS ARRAY) +UNION ALL +SELECT 3, array(CAST(3.3 AS DOUBLE), CAST(NULL AS DOUBLE)) +UNION ALL +SELECT 4, array(CAST(4.4 AS DOUBLE), CAST(5.5 AS DOUBLE)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_parquet_array_nulls_double +POSTHOOK: Lineage: test_parquet_array_nulls_double.arr_prim SCRIPT [] +POSTHOOK: Lineage: test_parquet_array_nulls_double.id SCRIPT [] +PREHOOK: query: SELECT * FROM test_parquet_array_nulls_double +PREHOOK: type: QUERY +PREHOOK: Input: default@test_parquet_array_nulls_double +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM test_parquet_array_nulls_double +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_parquet_array_nulls_double +#### A masked pattern was here #### +1 [null,null] +2 NULL +3 [3.3,null] +4 [4.4,5.5] +PREHOOK: query: CREATE TABLE test_parquet_array_nulls_varchar ( + id INT, + arr_prim ARRAY +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_parquet_array_nulls_varchar +POSTHOOK: query: CREATE TABLE test_parquet_array_nulls_varchar ( + id INT, + arr_prim ARRAY +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_parquet_array_nulls_varchar +PREHOOK: query: INSERT INTO test_parquet_array_nulls_varchar +SELECT 1, array(CAST(NULL AS VARCHAR(20)), CAST(NULL AS VARCHAR(20))) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_parquet_array_nulls_varchar +POSTHOOK: query: INSERT INTO test_parquet_array_nulls_varchar +SELECT 1, array(CAST(NULL AS VARCHAR(20)), CAST(NULL AS VARCHAR(20))) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_parquet_array_nulls_varchar +POSTHOOK: Lineage: test_parquet_array_nulls_varchar.arr_prim EXPRESSION [] +POSTHOOK: Lineage: test_parquet_array_nulls_varchar.id SIMPLE [] +PREHOOK: query: INSERT INTO test_parquet_array_nulls_varchar +SELECT 2, CAST(NULL AS ARRAY) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_parquet_array_nulls_varchar +POSTHOOK: query: INSERT INTO test_parquet_array_nulls_varchar +SELECT 2, CAST(NULL AS ARRAY) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_parquet_array_nulls_varchar +POSTHOOK: Lineage: test_parquet_array_nulls_varchar.arr_prim SIMPLE [] +POSTHOOK: Lineage: test_parquet_array_nulls_varchar.id SIMPLE [] +PREHOOK: query: SELECT * FROM test_parquet_array_nulls_varchar +PREHOOK: type: QUERY +PREHOOK: Input: default@test_parquet_array_nulls_varchar +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM test_parquet_array_nulls_varchar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_parquet_array_nulls_varchar +#### A masked pattern was here #### +1 [null,null] +2 NULL +PREHOOK: query: CREATE TABLE test_parquet_array_nulls_float ( + id INT, + arr_prim ARRAY +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_parquet_array_nulls_float +POSTHOOK: query: CREATE TABLE test_parquet_array_nulls_float ( + id INT, + arr_prim ARRAY +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_parquet_array_nulls_float +PREHOOK: query: INSERT INTO test_parquet_array_nulls_float +SELECT 1, array(CAST(NULL AS FLOAT), CAST(NULL AS FLOAT)) +UNION ALL +SELECT 2, CAST(NULL AS ARRAY) +UNION ALL +SELECT 3, array(CAST(3.3 AS FLOAT), CAST(NULL AS FLOAT)) +UNION ALL +SELECT 4, array(CAST(4.4 AS FLOAT), CAST(5.5 AS FLOAT)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_parquet_array_nulls_float +POSTHOOK: query: INSERT INTO test_parquet_array_nulls_float +SELECT 1, array(CAST(NULL AS FLOAT), CAST(NULL AS FLOAT)) +UNION ALL +SELECT 2, CAST(NULL AS ARRAY) +UNION ALL +SELECT 3, array(CAST(3.3 AS FLOAT), CAST(NULL AS FLOAT)) +UNION ALL +SELECT 4, array(CAST(4.4 AS FLOAT), CAST(5.5 AS FLOAT)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_parquet_array_nulls_float +POSTHOOK: Lineage: test_parquet_array_nulls_float.arr_prim SCRIPT [] +POSTHOOK: Lineage: test_parquet_array_nulls_float.id SCRIPT [] +PREHOOK: query: SELECT * FROM test_parquet_array_nulls_float +PREHOOK: type: QUERY +PREHOOK: Input: default@test_parquet_array_nulls_float +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM test_parquet_array_nulls_float +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_parquet_array_nulls_float +#### A masked pattern was here #### +1 [null,null] +2 NULL +3 [3.3,null] +4 [4.4,5.5] +PREHOOK: query: SELECT * FROM test_parquet_array_nulls_bool +PREHOOK: type: QUERY +PREHOOK: Input: default@test_parquet_array_nulls_bool +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM test_parquet_array_nulls_bool +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_parquet_array_nulls_bool +#### A masked pattern was here #### +1 [null,null] +2 NULL +3 [true,null] +4 [true,false] +PREHOOK: query: SELECT * FROM test_parquet_array_nulls_double +PREHOOK: type: QUERY +PREHOOK: Input: default@test_parquet_array_nulls_double +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM test_parquet_array_nulls_double +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_parquet_array_nulls_double +#### A masked pattern was here #### +1 [null,null] +2 NULL +3 [3.3,null] +4 [4.4,5.5] +PREHOOK: query: SELECT * FROM test_parquet_array_nulls_varchar +PREHOOK: type: QUERY +PREHOOK: Input: default@test_parquet_array_nulls_varchar +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM test_parquet_array_nulls_varchar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_parquet_array_nulls_varchar +#### A masked pattern was here #### +1 [null,null] +2 NULL +PREHOOK: query: SELECT * FROM test_parquet_array_nulls_float +PREHOOK: type: QUERY +PREHOOK: Input: default@test_parquet_array_nulls_float +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM test_parquet_array_nulls_float +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_parquet_array_nulls_float +#### A masked pattern was here #### +1 [null,null] +2 NULL +3 [3.3,null] +4 [4.4,5.5] diff --git a/ql/src/test/results/clientpositive/llap/parquet_map_null_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_map_null_vectorization.q.out index d947099cd386..10ec145c68fd 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_map_null_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_map_null_vectorization.q.out @@ -42,8 +42,8 @@ POSTHOOK: query: select id, booleanMap from parquet_map_type_boolean POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type_boolean #### A masked pattern was here #### -1 {true:false,false:true} -2 {false:false} +1 {true:null,false:true} +2 NULL PREHOOK: query: select id, booleanMap[true] from parquet_map_type_boolean group by id, booleanMap[true] PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type_boolean @@ -52,8 +52,8 @@ POSTHOOK: query: select id, booleanMap[true] from parquet_map_type_boolean group POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type_boolean #### A masked pattern was here #### +1 NULL 2 NULL -1 false PREHOOK: query: CREATE TABLE parquet_map_type_string ( id int, stringMap map @@ -154,8 +154,8 @@ select id, intMap from parquet_map_type_int POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type_int #### A masked pattern was here #### -1 {1:0,2:3} -2 {0:0} +1 {1:null,2:3} +2 NULL PREHOOK: query: select id, intMap[1] from parquet_map_type_int group by id, intMap[1] PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type_int @@ -164,7 +164,7 @@ POSTHOOK: query: select id, intMap[1] from parquet_map_type_int group by id, int POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type_int #### A masked pattern was here #### -1 0 +1 NULL 2 NULL PREHOOK: query: CREATE TABLE parquet_map_type_double ( id int, @@ -198,7 +198,7 @@ POSTHOOK: query: select id, doubleMap from parquet_map_type_double POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type_double #### A masked pattern was here #### -2 {0.0:0.0} +2 NULL PREHOOK: query: select id, doubleMap[1.0] from parquet_map_type_double group by id, doubleMap[1.0] PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type_double @@ -250,8 +250,8 @@ POSTHOOK: query: select id, decimalMap from parquet_map_type_decimal POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type_decimal #### A masked pattern was here #### -1 {1:0,2:3} -2 {0:0} +1 {1:null,2:3} +2 NULL PREHOOK: query: select id, decimalMap[1.0] from parquet_map_type_decimal group by id, decimalMap[1.0] PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type_decimal @@ -260,7 +260,7 @@ POSTHOOK: query: select id, decimalMap[1.0] from parquet_map_type_decimal group POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type_decimal #### A masked pattern was here #### -1 0 +1 NULL 2 NULL PREHOOK: query: CREATE TABLE parquet_map_type_date ( id int, @@ -304,8 +304,8 @@ POSTHOOK: query: select id, dateMap from parquet_map_type_date POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type_date #### A masked pattern was here #### -1 {"2015-11-29":"1970-01-01","2016-11-29":"2017-11-29"} -2 {"1970-01-01":"1970-01-01"} +1 {"2015-11-29":null,"2016-11-29":"2017-11-29"} +2 NULL PREHOOK: query: select id, dateMap[CAST('2015-11-29' AS DATE)] from parquet_map_type_date group by id, dateMap[CAST('2015-11-29' AS DATE)] PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type_date @@ -314,5 +314,5 @@ POSTHOOK: query: select id, dateMap[CAST('2015-11-29' AS DATE)] from parquet_map POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type_date #### A masked pattern was here #### -1 1970-01-01 +1 NULL 2 NULL