diff --git a/rust/datafusion/Cargo.toml b/rust/datafusion/Cargo.toml index 8d2816f3e07..0e802c0b1e5 100644 --- a/rust/datafusion/Cargo.toml +++ b/rust/datafusion/Cargo.toml @@ -88,3 +88,7 @@ harness = false [[bench]] name = "filter_query_sql" harness = false + +[[bench]] +name = "scalar" +harness = false diff --git a/rust/datafusion/benches/scalar.rs b/rust/datafusion/benches/scalar.rs new file mode 100644 index 00000000000..30f21a964d5 --- /dev/null +++ b/rust/datafusion/benches/scalar.rs @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use criterion::{criterion_group, criterion_main, Criterion}; +use datafusion::scalar::ScalarValue; + +fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("to_array_of_size 100000", |b| { + let scalar = ScalarValue::Int32(Some(100)); + + b.iter(|| assert_eq!(scalar.to_array_of_size(100000).null_count(), 0)) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/rust/datafusion/src/scalar.rs b/rust/datafusion/src/scalar.rs index 64e5bcc22b5..8623edc6cac 100644 --- a/rust/datafusion/src/scalar.rs +++ b/rust/datafusion/src/scalar.rs @@ -17,7 +17,7 @@ //! This module provides ScalarValue, an enum that can be used for storage of single elements -use std::{convert::TryFrom, fmt, sync::Arc}; +use std::{convert::TryFrom, fmt, iter::repeat, sync::Arc}; use arrow::array::{ Int16Builder, Int32Builder, Int64Builder, Int8Builder, ListBuilder, @@ -205,28 +205,104 @@ impl ScalarValue { ScalarValue::Boolean(e) => { Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef } - ScalarValue::Float64(e) => { - Arc::new(Float64Array::from(vec![*e; size])) as ArrayRef - } - ScalarValue::Float32(e) => Arc::new(Float32Array::from(vec![*e; size])), - ScalarValue::Int8(e) => Arc::new(Int8Array::from(vec![*e; size])), - ScalarValue::Int16(e) => Arc::new(Int16Array::from(vec![*e; size])), - ScalarValue::Int32(e) => Arc::new(Int32Array::from(vec![*e; size])), - ScalarValue::Int64(e) => Arc::new(Int64Array::from(vec![*e; size])), - ScalarValue::UInt8(e) => Arc::new(UInt8Array::from(vec![*e; size])), - ScalarValue::UInt16(e) => Arc::new(UInt16Array::from(vec![*e; size])), - ScalarValue::UInt32(e) => Arc::new(UInt32Array::from(vec![*e; size])), - ScalarValue::UInt64(e) => Arc::new(UInt64Array::from(vec![*e; size])), - ScalarValue::TimeMicrosecond(e) => { - Arc::new(TimestampMicrosecondArray::from(vec![*e])) - } - ScalarValue::TimeNanosecond(e) => { - Arc::new(TimestampNanosecondArray::from_opt_vec(vec![*e], None)) - } - ScalarValue::Utf8(e) => Arc::new(StringArray::from(vec![e.as_deref(); size])), - ScalarValue::LargeUtf8(e) => { - Arc::new(LargeStringArray::from(vec![e.as_deref(); size])) - } + ScalarValue::Float64(e) => match e { + Some(value) => { + Arc::new(Float64Array::from_iter_values(repeat(*value).take(size))) + } + None => Arc::new(repeat(None).take(size).collect::()), + }, + ScalarValue::Float32(e) => match e { + Some(value) => { + Arc::new(Float32Array::from_iter_values(repeat(*value).take(size))) + } + None => Arc::new(repeat(None).take(size).collect::()), + }, + ScalarValue::Int8(e) => match e { + Some(value) => { + Arc::new(Int8Array::from_iter_values(repeat(*value).take(size))) + } + None => Arc::new(repeat(None).take(size).collect::()), + }, + ScalarValue::Int16(e) => match e { + Some(value) => { + Arc::new(Int16Array::from_iter_values(repeat(*value).take(size))) + } + None => Arc::new(repeat(None).take(size).collect::()), + }, + ScalarValue::Int32(e) => match e { + Some(value) => { + Arc::new(Int32Array::from_iter_values(repeat(*value).take(size))) + } + None => Arc::new(repeat(None).take(size).collect::()), + }, + ScalarValue::Int64(e) => match e { + Some(value) => { + Arc::new(Int64Array::from_iter_values(repeat(*value).take(size))) + } + None => Arc::new(repeat(None).take(size).collect::()), + }, + ScalarValue::UInt8(e) => match e { + Some(value) => { + Arc::new(UInt8Array::from_iter_values(repeat(*value).take(size))) + } + None => Arc::new(repeat(None).take(size).collect::()), + }, + ScalarValue::UInt16(e) => match e { + Some(value) => { + Arc::new(UInt16Array::from_iter_values(repeat(*value).take(size))) + } + None => Arc::new(repeat(None).take(size).collect::()), + }, + ScalarValue::UInt32(e) => match e { + Some(value) => { + Arc::new(UInt32Array::from_iter_values(repeat(*value).take(size))) + } + None => Arc::new(repeat(None).take(size).collect::()), + }, + ScalarValue::UInt64(e) => match e { + Some(value) => { + Arc::new(UInt64Array::from_iter_values(repeat(*value).take(size))) + } + None => Arc::new(repeat(None).take(size).collect::()), + }, + ScalarValue::TimeMicrosecond(e) => match e { + Some(value) => Arc::new(TimestampMicrosecondArray::from_iter_values( + repeat(*value).take(size), + )), + None => Arc::new( + repeat(None) + .take(size) + .collect::(), + ), + }, + ScalarValue::TimeNanosecond(e) => match e { + Some(value) => Arc::new(TimestampNanosecondArray::from_iter_values( + repeat(*value).take(size), + )), + None => Arc::new( + repeat(None) + .take(size) + .collect::(), + ), + }, + ScalarValue::Utf8(e) => match e { + Some(value) => { + Arc::new(StringArray::from_iter_values(repeat(value).take(size))) + } + None => { + Arc::new(repeat(None::<&str>).take(size).collect::()) + } + }, + ScalarValue::LargeUtf8(e) => match e { + Some(value) => { + Arc::new(LargeStringArray::from_iter_values(repeat(value).take(size))) + } + None => Arc::new( + repeat(None::<&str>) + .take(size) + .collect::(), + ), + }, ScalarValue::List(values, data_type) => Arc::new(match data_type { DataType::Int8 => build_list!(Int8Builder, Int8, values, size), DataType::Int16 => build_list!(Int16Builder, Int16, values, size), @@ -238,7 +314,12 @@ impl ScalarValue { DataType::UInt64 => build_list!(UInt64Builder, UInt64, values, size), _ => panic!("Unexpected DataType for list"), }), - ScalarValue::Date32(e) => Arc::new(Date32Array::from(vec![*e; size])), + ScalarValue::Date32(e) => match e { + Some(value) => { + Arc::new(Date32Array::from_iter_values(repeat(*value).take(size))) + } + None => Arc::new(repeat(None).take(size).collect::()), + }, } }