From c10d4100bdf3ef89acc1e1dbe9cc7c21f4e11efd Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 28 Dec 2025 10:09:00 -0800 Subject: [PATCH 1/2] perf: improve performance of string replace (17-32% faster) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a reusable String buffer instead of allocating a new String for each row. This optimization achieves 17-32% performance improvement across different string types and sizes by avoiding per-row allocations. Benchmark results: | Benchmark | Array Size | String Length | Baseline (µs) | Optimized (µs) | Improvement | |---------------|------------|---------------|---------------|----------------|-------------| | string_view | 1024 | 32 | 32.53 | 22.32 | 31.4% faster| | string | 1024 | 32 | 31.89 | 21.49 | 32.6% faster| | large_string | 1024 | 32 | 31.75 | 22.01 | 30.7% faster| | string_view | 1024 | 128 | 49.51 | 36.11 | 27.1% faster| | string | 1024 | 128 | 48.91 | 34.90 | 28.6% faster| | large_string | 1024 | 128 | 49.78 | 35.42 | 28.8% faster| | string_view | 4096 | 32 | 133.67 | 95.93 | 28.2% faster| | string | 4096 | 32 | 131.48 | 91.73 | 30.2% faster| | large_string | 4096 | 32 | 129.61 | 92.82 | 28.4% faster| | string_view | 4096 | 128 | 191.50 | 153.74 | 19.7% faster| | string | 4096 | 128 | 185.27 | 149.37 | 19.4% faster| | large_string | 4096 | 128 | 187.82 | 154.32 | 17.8% faster| --- datafusion/functions/Cargo.toml | 5 + datafusion/functions/benches/replace.rs | 165 +++++++++++++++++++++ datafusion/functions/src/string/replace.rs | 69 +++++++-- 3 files changed, 224 insertions(+), 15 deletions(-) create mode 100644 datafusion/functions/benches/replace.rs diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 765f5d865a60e..c8584612874bf 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -210,6 +210,11 @@ harness = false name = "repeat" required-features = ["string_expressions"] +[[bench]] +harness = false +name = "replace" +required-features = ["string_expressions"] + [[bench]] harness = false name = "random" diff --git a/datafusion/functions/benches/replace.rs b/datafusion/functions/benches/replace.rs new file mode 100644 index 0000000000000..c974cd8a9baec --- /dev/null +++ b/datafusion/functions/benches/replace.rs @@ -0,0 +1,165 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate criterion; + +use arrow::array::OffsetSizeTrait; +use arrow::datatypes::{DataType, Field}; +use arrow::util::bench_util::{ + create_string_array_with_len, create_string_view_array_with_len, +}; +use criterion::{Criterion, SamplingMode, criterion_group, criterion_main}; +use datafusion_common::DataFusionError; +use datafusion_common::config::ConfigOptions; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; +use datafusion_functions::string; +use std::hint::black_box; +use std::sync::Arc; +use std::time::Duration; + +fn create_args( + size: usize, + str_len: usize, + force_view_types: bool, +) -> Vec { + if force_view_types { + let string_array = + Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false)); + let from_array = Arc::new(create_string_view_array_with_len(size, 0.1, 3, false)); + let to_array = Arc::new(create_string_view_array_with_len(size, 0.1, 5, false)); + vec![ + ColumnarValue::Array(string_array), + ColumnarValue::Array(from_array), + ColumnarValue::Array(to_array), + ] + } else { + let string_array = + Arc::new(create_string_array_with_len::(size, 0.1, str_len)); + let from_array = Arc::new(create_string_array_with_len::(size, 0.1, 3)); + let to_array = Arc::new(create_string_array_with_len::(size, 0.1, 5)); + + vec![ + ColumnarValue::Array(string_array), + ColumnarValue::Array(from_array), + ColumnarValue::Array(to_array), + ] + } +} + +fn invoke_replace_with_args( + args: Vec, + number_rows: usize, +) -> Result { + let arg_fields = args + .iter() + .enumerate() + .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); + + string::replace().invoke_with_args(ScalarFunctionArgs { + args, + arg_fields, + number_rows, + return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), + }) +} + +fn criterion_benchmark(c: &mut Criterion) { + for size in [1024, 4096] { + let mut group = c.benchmark_group(format!("replace size={size}")); + group.sampling_mode(SamplingMode::Flat); + group.sample_size(10); + group.measurement_time(Duration::from_secs(10)); + + // Small strings + let str_len = 32; + let args = create_args::(size, str_len, true); + group.bench_function( + format!("replace_string_view [size={size}, str_len={str_len}]"), + |b| { + b.iter(|| { + let args_cloned = args.clone(); + black_box(invoke_replace_with_args(args_cloned, size)) + }) + }, + ); + + let args = create_args::(size, str_len, false); + group.bench_function( + format!("replace_string [size={size}, str_len={str_len}]"), + |b| { + b.iter(|| { + let args_cloned = args.clone(); + black_box(invoke_replace_with_args(args_cloned, size)) + }) + }, + ); + + let args = create_args::(size, str_len, false); + group.bench_function( + format!("replace_large_string [size={size}, str_len={str_len}]"), + |b| { + b.iter(|| { + let args_cloned = args.clone(); + black_box(invoke_replace_with_args(args_cloned, size)) + }) + }, + ); + + // Larger strings + let str_len = 128; + let args = create_args::(size, str_len, true); + group.bench_function( + format!("replace_string_view [size={size}, str_len={str_len}]"), + |b| { + b.iter(|| { + let args_cloned = args.clone(); + black_box(invoke_replace_with_args(args_cloned, size)) + }) + }, + ); + + let args = create_args::(size, str_len, false); + group.bench_function( + format!("replace_string [size={size}, str_len={str_len}]"), + |b| { + b.iter(|| { + let args_cloned = args.clone(); + black_box(invoke_replace_with_args(args_cloned, size)) + }) + }, + ); + + let args = create_args::(size, str_len, false); + group.bench_function( + format!("replace_large_string [size={size}, str_len={str_len}]"), + |b| { + b.iter(|| { + let args_cloned = args.clone(); + black_box(invoke_replace_with_args(args_cloned, size)) + }) + }, + ); + + group.finish(); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs index a976ca7b9139d..004e04befdcc8 100644 --- a/datafusion/functions/src/string/replace.rs +++ b/datafusion/functions/src/string/replace.rs @@ -18,7 +18,7 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray}; +use arrow::array::{ArrayRef, GenericStringBuilder, OffsetSizeTrait}; use arrow::datatypes::DataType; use crate::utils::{make_scalar_function, utf8_to_str_type}; @@ -165,17 +165,25 @@ fn replace_view(args: &[ArrayRef]) -> Result { let from_array = as_string_view_array(&args[1])?; let to_array = as_string_view_array(&args[2])?; - let result = string_array + let mut builder = GenericStringBuilder::::new(); + let mut buffer = String::new(); + + for ((string, from), to) in string_array .iter() .zip(from_array.iter()) .zip(to_array.iter()) - .map(|((string, from), to)| match (string, from, to) { - (Some(string), Some(from), Some(to)) => Some(string.replace(from, to)), - _ => None, - }) - .collect::(); + { + match (string, from, to) { + (Some(string), Some(from), Some(to)) => { + buffer.clear(); + replace_into_string(&mut buffer, string, from, to); + builder.append_value(&buffer); + } + _ => builder.append_null(), + } + } - Ok(Arc::new(result) as ArrayRef) + Ok(Arc::new(builder.finish()) as ArrayRef) } /// Replaces all occurrences in string of substring from with substring to. @@ -185,17 +193,48 @@ fn replace(args: &[ArrayRef]) -> Result { let from_array = as_generic_string_array::(&args[1])?; let to_array = as_generic_string_array::(&args[2])?; - let result = string_array + let mut builder = GenericStringBuilder::::new(); + let mut buffer = String::new(); + + for ((string, from), to) in string_array .iter() .zip(from_array.iter()) .zip(to_array.iter()) - .map(|((string, from), to)| match (string, from, to) { - (Some(string), Some(from), Some(to)) => Some(string.replace(from, to)), - _ => None, - }) - .collect::>(); + { + match (string, from, to) { + (Some(string), Some(from), Some(to)) => { + buffer.clear(); + replace_into_string(&mut buffer, string, from, to); + builder.append_value(&buffer); + } + _ => builder.append_null(), + } + } + + Ok(Arc::new(builder.finish()) as ArrayRef) +} - Ok(Arc::new(result) as ArrayRef) +/// Helper function to perform string replacement into a reusable String buffer +#[inline] +fn replace_into_string(buffer: &mut String, string: &str, from: &str, to: &str) { + if from.is_empty() { + // When from is empty, insert 'to' at the beginning, between each character, and at the end + // This matches the behavior of str::replace() + buffer.push_str(to); + for ch in string.chars() { + buffer.push(ch); + buffer.push_str(to); + } + return; + } + + let mut last_end = 0; + for (start, _part) in string.match_indices(from) { + buffer.push_str(&string[last_end..start]); + buffer.push_str(to); + last_end = start + from.len(); + } + buffer.push_str(&string[last_end..]); } #[cfg(test)] From 4f36125cf4b4d02164a2767926646d4d4b6eecb2 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 28 Dec 2025 20:19:40 -0800 Subject: [PATCH 2/2] Add ASCII fast path optimization to replace function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a fast path for replacing single ASCII characters with another single ASCII character, matching Rust's str::replace() optimization. This enables vectorization and avoids UTF-8 boundary checking overhead. Changes: - Added ASCII character detection in replace_into_string() - When both 'from' and 'to' are single ASCII bytes, use direct byte mapping - Updated benchmark to include single ASCII character replacement tests Optimization: - Fast path operates directly on bytes using simple map operation - Compiler can vectorize the byte-wise replacement - Avoids overhead of match_indices() pattern matching for this common case Benchmark Results (Single ASCII Character Replacement) against previous commit: - size=1024, str_len=32: 29.5 µs → 21.4 µs (27% faster) - size=1024, str_len=128: 73.9 µs → 23.4 µs (68% faster) - size=4096, str_len=32: 121.8 µs → 85.6 µs (30% faster) - size=4096, str_len=128: 316.9 µs → 83.8 µs (74% faster) The optimization shows exceptional 27-74% improvements, with the benefit scaling dramatically with string length. For 128-character strings, we achieve over 3x speedup by enabling vectorization and eliminating pattern matching overhead. This addresses reviewer feedback about capturing Rust's str::replace() optimization tricks for single ASCII character replacements. --- datafusion/functions/benches/replace.rs | 50 +++++++++++++++++----- datafusion/functions/src/string/replace.rs | 16 +++++++ 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/datafusion/functions/benches/replace.rs b/datafusion/functions/benches/replace.rs index c974cd8a9baec..deadbfeb99a84 100644 --- a/datafusion/functions/benches/replace.rs +++ b/datafusion/functions/benches/replace.rs @@ -35,12 +35,17 @@ fn create_args( size: usize, str_len: usize, force_view_types: bool, + from_len: usize, + to_len: usize, ) -> Vec { if force_view_types { let string_array = Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false)); - let from_array = Arc::new(create_string_view_array_with_len(size, 0.1, 3, false)); - let to_array = Arc::new(create_string_view_array_with_len(size, 0.1, 5, false)); + let from_array = Arc::new(create_string_view_array_with_len( + size, 0.1, from_len, false, + )); + let to_array = + Arc::new(create_string_view_array_with_len(size, 0.1, to_len, false)); vec![ ColumnarValue::Array(string_array), ColumnarValue::Array(from_array), @@ -49,8 +54,8 @@ fn create_args( } else { let string_array = Arc::new(create_string_array_with_len::(size, 0.1, str_len)); - let from_array = Arc::new(create_string_array_with_len::(size, 0.1, 3)); - let to_array = Arc::new(create_string_array_with_len::(size, 0.1, 5)); + let from_array = Arc::new(create_string_array_with_len::(size, 0.1, from_len)); + let to_array = Arc::new(create_string_array_with_len::(size, 0.1, to_len)); vec![ ColumnarValue::Array(string_array), @@ -87,9 +92,21 @@ fn criterion_benchmark(c: &mut Criterion) { group.sample_size(10); group.measurement_time(Duration::from_secs(10)); - // Small strings + // ASCII single character replacement (fast path) let str_len = 32; - let args = create_args::(size, str_len, true); + let args = create_args::(size, str_len, false, 1, 1); + group.bench_function( + format!("replace_string_ascii_single [size={size}, str_len={str_len}]"), + |b| { + b.iter(|| { + let args_cloned = args.clone(); + black_box(invoke_replace_with_args(args_cloned, size)) + }) + }, + ); + + // Multi-character strings (general path) + let args = create_args::(size, str_len, true, 3, 5); group.bench_function( format!("replace_string_view [size={size}, str_len={str_len}]"), |b| { @@ -100,7 +117,7 @@ fn criterion_benchmark(c: &mut Criterion) { }, ); - let args = create_args::(size, str_len, false); + let args = create_args::(size, str_len, false, 3, 5); group.bench_function( format!("replace_string [size={size}, str_len={str_len}]"), |b| { @@ -111,7 +128,7 @@ fn criterion_benchmark(c: &mut Criterion) { }, ); - let args = create_args::(size, str_len, false); + let args = create_args::(size, str_len, false, 3, 5); group.bench_function( format!("replace_large_string [size={size}, str_len={str_len}]"), |b| { @@ -124,7 +141,18 @@ fn criterion_benchmark(c: &mut Criterion) { // Larger strings let str_len = 128; - let args = create_args::(size, str_len, true); + let args = create_args::(size, str_len, false, 1, 1); + group.bench_function( + format!("replace_string_ascii_single [size={size}, str_len={str_len}]"), + |b| { + b.iter(|| { + let args_cloned = args.clone(); + black_box(invoke_replace_with_args(args_cloned, size)) + }) + }, + ); + + let args = create_args::(size, str_len, true, 3, 5); group.bench_function( format!("replace_string_view [size={size}, str_len={str_len}]"), |b| { @@ -135,7 +163,7 @@ fn criterion_benchmark(c: &mut Criterion) { }, ); - let args = create_args::(size, str_len, false); + let args = create_args::(size, str_len, false, 3, 5); group.bench_function( format!("replace_string [size={size}, str_len={str_len}]"), |b| { @@ -146,7 +174,7 @@ fn criterion_benchmark(c: &mut Criterion) { }, ); - let args = create_args::(size, str_len, false); + let args = create_args::(size, str_len, false, 3, 5); group.bench_function( format!("replace_large_string [size={size}, str_len={str_len}]"), |b| { diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs index 004e04befdcc8..165e0634a6b80 100644 --- a/datafusion/functions/src/string/replace.rs +++ b/datafusion/functions/src/string/replace.rs @@ -228,6 +228,22 @@ fn replace_into_string(buffer: &mut String, string: &str, from: &str, to: &str) return; } + // Fast path for replacing a single ASCII character with another single ASCII character + // This matches Rust's str::replace() optimization and enables vectorization + if let ([from_byte], [to_byte]) = (from.as_bytes(), to.as_bytes()) + && from_byte.is_ascii() + && to_byte.is_ascii() + { + // SAFETY: We're replacing ASCII with ASCII, which preserves UTF-8 validity + let replaced: Vec = string + .as_bytes() + .iter() + .map(|b| if *b == *from_byte { *to_byte } else { *b }) + .collect(); + buffer.push_str(unsafe { std::str::from_utf8_unchecked(&replaced) }); + return; + } + let mut last_end = 0; for (start, _part) in string.match_indices(from) { buffer.push_str(&string[last_end..start]);