From b4cc8d3deb85cd918b9fc0059d34542be444cd14 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Sun, 15 Mar 2026 15:33:06 +0530 Subject: [PATCH 1/4] HIVE-29433: ClassCastException in FilterLongColumnBetween.evaluate when vectorization is enabled: DecimalColumnVector cannot be cast to class LongColumnVector --- .../ql/exec/vector/VectorizationContext.java | 4 +++ .../clientpositive/cast_decimal_vectorized.q | 9 +++++ .../llap/cast_decimal_vectorized.q.out | 35 +++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 ql/src/test/queries/clientpositive/cast_decimal_vectorized.q create mode 100644 ql/src/test/results/clientpositive/llap/cast_decimal_vectorized.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index e56cbb3cec73..54da6a537acc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1717,6 +1717,10 @@ private boolean checkExprNodeDescForDecimal64(ExprNodeDesc exprNodeDesc) throws GenericUDF udf = ((ExprNodeGenericFuncDesc) exprNodeDesc).getGenericUDF(); Class udfClass = udf.getClass(); if (udf instanceof GenericUDFToDecimal) { + ExprNodeDesc child = exprNodeDesc.getChildren().get(0); + if (isDecimalFamily(child.getTypeString())) { + return checkExprNodeDescForDecimal64(child); + } return true; } // We have a class-level annotation that says whether the UDF's vectorization expressions diff --git a/ql/src/test/queries/clientpositive/cast_decimal_vectorized.q b/ql/src/test/queries/clientpositive/cast_decimal_vectorized.q new file mode 100644 index 000000000000..a551c308cf22 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cast_decimal_vectorized.q @@ -0,0 +1,9 @@ +CREATE TABLE test_stats0 (e decimal(38,10)) stored as orc; +insert into test_stats0 (e) values (0.0); + +set hive.vectorized.execution.enabled=false; +select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0; + + +set hive.vectorized.execution.enabled=true; +select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0; diff --git a/ql/src/test/results/clientpositive/llap/cast_decimal_vectorized.q.out b/ql/src/test/results/clientpositive/llap/cast_decimal_vectorized.q.out new file mode 100644 index 000000000000..f0f0d34db27d --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/cast_decimal_vectorized.q.out @@ -0,0 +1,35 @@ +PREHOOK: query: CREATE TABLE test_stats0 (e decimal(38,10)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_stats0 +POSTHOOK: query: CREATE TABLE test_stats0 (e decimal(38,10)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_stats0 +PREHOOK: query: insert into test_stats0 (e) values (0.0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_stats0 +POSTHOOK: query: insert into test_stats0 (e) values (0.0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_stats0 +POSTHOOK: Lineage: test_stats0.e SCRIPT [] +PREHOOK: query: select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_stats0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_stats0 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_stats0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_stats0 +#### A masked pattern was here #### +0 From 78bcca6c723365672520d197f3f34723dcf04e19 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Tue, 14 Apr 2026 11:53:35 +0530 Subject: [PATCH 2/4] Update the qtest as per review comments --- .../clientpositive/cast_decimal_vectorized.q | 4 + .../llap/cast_decimal_vectorized.q.out | 271 ++++++++++++++++++ 2 files changed, 275 insertions(+) diff --git a/ql/src/test/queries/clientpositive/cast_decimal_vectorized.q b/ql/src/test/queries/clientpositive/cast_decimal_vectorized.q index a551c308cf22..46bc9f4b981c 100644 --- a/ql/src/test/queries/clientpositive/cast_decimal_vectorized.q +++ b/ql/src/test/queries/clientpositive/cast_decimal_vectorized.q @@ -6,4 +6,8 @@ select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AN set hive.vectorized.execution.enabled=true; +EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0; select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0; + +EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats0 where CAST(e as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0; +select count(*) from test_stats0 where CAST(e as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/cast_decimal_vectorized.q.out b/ql/src/test/results/clientpositive/llap/cast_decimal_vectorized.q.out index f0f0d34db27d..e0f28ef829f0 100644 --- a/ql/src/test/results/clientpositive/llap/cast_decimal_vectorized.q.out +++ b/ql/src/test/results/clientpositive/llap/cast_decimal_vectorized.q.out @@ -24,6 +24,137 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_stats0 #### A masked pattern was here #### 0 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_stats0 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_stats0 +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_stats0 + filterExpr: CAST( e AS decimal(15,1)) BETWEEN 100 AND 1000 (type: boolean) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:e:decimal(38,10), 1:ROW__ID:struct, 2:ROW__IS__DELETED:boolean] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnBetween(col 3:decimal(15,1), left 100, right 1000)(children: CastDecimalToDecimal(col 0:decimal(38,10)) -> 3:decimal(15,1)) + predicate: CAST( e AS decimal(15,1)) BETWEEN 100 AND 1000 (type: boolean) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: e:decimal(38,10) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(15,1)] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0 PREHOOK: type: QUERY PREHOOK: Input: default@test_stats0 @@ -33,3 +164,143 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_stats0 #### A masked pattern was here #### 0 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats0 where CAST(e as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_stats0 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats0 where CAST(e as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_stats0 +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_stats0 + filterExpr: CAST( e AS decimal(30,1)) BETWEEN 100 AND 1000 (type: boolean) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:e:decimal(38,10), 1:ROW__ID:struct, 2:ROW__IS__DELETED:boolean] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnBetween(col 3:decimal(30,1), left 100, right 1000)(children: CastDecimalToDecimal(col 0:decimal(38,10)) -> 3:decimal(30,1)) + predicate: CAST( e AS decimal(30,1)) BETWEEN 100 AND 1000 (type: boolean) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: e:decimal(38,10) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(30,1)] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from test_stats0 where CAST(e as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_stats0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_stats0 where CAST(e as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_stats0 +#### A masked pattern was here #### +0 From e02bba3ecabf096b02733adb748cc65dd91210f0 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Tue, 14 Apr 2026 19:34:59 +0530 Subject: [PATCH 3/4] Add javadoc for checkExprNodeDescForDecimal64 --- .../hive/ql/exec/vector/VectorizationContext.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 54da6a537acc..894231e991e0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1700,6 +1700,16 @@ private VectorExpression getIdentityExpression(List childExprList) return ve; } + /** + * Determines whether an expression and its children are compatible with the DECIMAL_64 vectorization execution path. + * The method evaluates the expression against known scenarios where DECIMAL_64 is not supported, + * returning false if any incompatibility is found. For UDFs, it handles specific implementations + * (e.g., GenericUDFToDecimal), checks for the @VectorizedExpressionsSupportDecimal64 annotation, + * and recursively verifies all child nodes. If all checks pass, it returns true. + * @param exprNodeDesc expression node to be evaluated. + * @return true if the expression and its children are DECIMAL_64 compatible, false otherwise. + * @throws HiveException + */ private boolean checkExprNodeDescForDecimal64(ExprNodeDesc exprNodeDesc) throws HiveException { if (exprNodeDesc instanceof ExprNodeColumnDesc) { int colIndex = getInputColumnIndex((ExprNodeColumnDesc) exprNodeDesc); From 73f664cb1b98e413ab19872c80732019803fc38b Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Tue, 14 Apr 2026 19:48:40 +0530 Subject: [PATCH 4/4] Update qtest to include integer column for validation --- .../clientpositive/cast_decimal_vectorized.q | 17 +- .../llap/cast_decimal_vectorized.q.out | 306 ++++++++++++++++++ 2 files changed, 321 insertions(+), 2 deletions(-) diff --git a/ql/src/test/queries/clientpositive/cast_decimal_vectorized.q b/ql/src/test/queries/clientpositive/cast_decimal_vectorized.q index 46bc9f4b981c..f6a1cbdafee4 100644 --- a/ql/src/test/queries/clientpositive/cast_decimal_vectorized.q +++ b/ql/src/test/queries/clientpositive/cast_decimal_vectorized.q @@ -4,10 +4,23 @@ insert into test_stats0 (e) values (0.0); set hive.vectorized.execution.enabled=false; select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0; - set hive.vectorized.execution.enabled=true; EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0; select count(*) from test_stats0 where CAST(e as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0; EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats0 where CAST(e as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0; -select count(*) from test_stats0 where CAST(e as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0; \ No newline at end of file +select count(*) from test_stats0 where CAST(e as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0; + + +CREATE TABLE test_stats1 (int_col INT) stored as orc; +insert into test_stats1 (int_col) values (0); + +set hive.vectorized.execution.enabled=false; +select count(*) from test_stats1 where CAST(int_col as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0; + +set hive.vectorized.execution.enabled=true; +EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats1 where CAST(int_col as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0; +select count(*) from test_stats1 where CAST(int_col as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0; + +EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats1 where CAST(int_col as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0; +select count(*) from test_stats1 where CAST(int_col as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/cast_decimal_vectorized.q.out b/ql/src/test/results/clientpositive/llap/cast_decimal_vectorized.q.out index e0f28ef829f0..543cbd8e559b 100644 --- a/ql/src/test/results/clientpositive/llap/cast_decimal_vectorized.q.out +++ b/ql/src/test/results/clientpositive/llap/cast_decimal_vectorized.q.out @@ -304,3 +304,309 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_stats0 #### A masked pattern was here #### 0 +PREHOOK: query: CREATE TABLE test_stats1 (int_col INT) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_stats1 +POSTHOOK: query: CREATE TABLE test_stats1 (int_col INT) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_stats1 +PREHOOK: query: insert into test_stats1 (int_col) values (0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_stats1 +POSTHOOK: query: insert into test_stats1 (int_col) values (0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_stats1 +POSTHOOK: Lineage: test_stats1.int_col SCRIPT [] +PREHOOK: query: select count(*) from test_stats1 where CAST(int_col as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_stats1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_stats1 where CAST(int_col as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_stats1 +#### A masked pattern was here #### +0 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats1 where CAST(int_col as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_stats1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats1 where CAST(int_col as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_stats1 +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_stats1 + filterExpr: CAST( int_col AS decimal(15,1)) BETWEEN 100 AND 1000 (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:int_col:int, 1:ROW__ID:struct, 2:ROW__IS__DELETED:boolean] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimal64ColumnBetween(col 3:decimal(15,1)/DECIMAL_64, decimal64LeftVal 1000, decimalLeftVal 1000, decimal64RightVal 10000, decimalRightVal 10000)(children: CastLongToDecimal64(col 0:int) -> 3:decimal(15,1)/DECIMAL_64) + predicate: CAST( int_col AS decimal(15,1)) BETWEEN 100 AND 1000 (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: int_col:int + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(15,1)/DECIMAL_64] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from test_stats1 where CAST(int_col as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_stats1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_stats1 where CAST(int_col as DECIMAL(15,1)) BETWEEN 100.0 AND 1000.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_stats1 +#### A masked pattern was here #### +0 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats1 where CAST(int_col as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_stats1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL select count(*) from test_stats1 where CAST(int_col as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_stats1 +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_stats1 + filterExpr: CAST( int_col AS decimal(30,1)) BETWEEN 100 AND 1000 (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:int_col:int, 1:ROW__ID:struct, 2:ROW__IS__DELETED:boolean] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColumnBetween(col 3:decimal(30,1), left 100, right 1000)(children: CastLongToDecimal(col 0:int) -> 3:decimal(30,1)) + predicate: CAST( int_col AS decimal(30,1)) BETWEEN 100 AND 1000 (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: int_col:int + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(30,1)] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from test_stats1 where CAST(int_col as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_stats1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_stats1 where CAST(int_col as DECIMAL(30,1)) BETWEEN 100.0 AND 1000.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_stats1 +#### A masked pattern was here #### +0