From ad7621914b24bf8cfaa3b67abb9aa41c470ba6d1 Mon Sep 17 00:00:00 2001 From: Jim Newsome Date: Wed, 5 May 2021 18:16:45 -0500 Subject: [PATCH 1/3] Allocate buffers more efficiently * Allocate buffer with desired capacity instead of appending one byte at a time, which can result in repeated re-allocations. * Don't initialize buffer to zeros when we're going to overwrite the contents anyway. Experimentally, these changes make a very substantial performance improvement in Debug builds, though little noticeable difference in Release builds. e.g. in my application that uses this crate for logging, these changes bring total run time down from 8.64s to 2.45s in Debug builds. --- src/lib.rs | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 97d99de..eaea07d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,14 +20,21 @@ pub unsafe fn vsprintf(format: *const c_char, }) } +/// Return a buffer with the given length, filled with arbitrary data. +fn uninitd_buffer_of_len(len: usize) -> Vec { + let mut buffer = Vec::with_capacity(len); + // SAFETY: Any bit pattern is a valid u8. + unsafe { buffer.set_len(len) }; + buffer +} + /// Prints a format string into a list of raw bytes that form /// a null-terminated C string. pub unsafe fn vsprintf_raw(format: *const c_char, va_list: *mut V) -> Result> { let list_ptr = va_list as *mut c_void; - let mut buffer = Vec::new(); - buffer.extend([0u8; INITIAL_BUFFER_SIZE].iter().cloned()); + let mut buffer = uninitd_buffer_of_len(INITIAL_BUFFER_SIZE); loop { let character_count = vsnprintf_wrapper( @@ -45,20 +52,19 @@ pub unsafe fn vsprintf_raw(format: *const c_char, assert!(character_count >= 0); let character_count = character_count as usize; - let current_max = buffer.len() - 1; - // Check if we had enough room in the buffer to fit everything. - if character_count > current_max { - let extra_space_required = character_count - current_max; - + // Note that character_count doesn't include the NULL byte, but + // neither do we need to reserve space for it since we'll end up + // dropping it anyway. (vsnprintf will still print as much as it can, + // even if there isn't room for the NULL byte). + if character_count > buffer.len() { // Reserve enough space and try again. - buffer.extend(repeat(0).take(extra_space_required as usize)); + buffer = uninitd_buffer_of_len(character_count); continue; } else { // We fit everything into the buffer. // Truncate the buffer up until the null terminator. - buffer = buffer.into_iter() - .take_while(|&b| b != 0) - .collect(); + buffer.truncate(character_count); + buffer.shrink_to_fit(); break; } } From 1269b19458f47d53d1db580750abf004b8848b2f Mon Sep 17 00:00:00 2001 From: Jim Newsome Date: Mon, 14 Jun 2021 14:53:08 -0500 Subject: [PATCH 2/3] Ensure space for NULL byte Despite that we ultimately want to drop the NULL byte, we need to include space for it in the buffer provided to libc's vsnprintf; otherwise it may replace the last byte of the string with a NULL byte. This also simplifies things by providing a zero-length buffer for the first call to vsnprintf, and then allocating exactly the required space. This gives only one code path to maintain (as opposed to 2-3 before depending if the initial buffer was less than, equal to, or greater than the initial size). While this means paying to format the string twice, it lets us drop `shrink_to_fit` of the final string, which can be a relatively expensive call to the allocator. Fixes https://github.com/shadow/shadow/issues/1432 --- build.rs | 5 +--- src/lib.rs | 79 +++++++++++++++++++++--------------------------------- 2 files changed, 32 insertions(+), 52 deletions(-) diff --git a/build.rs b/build.rs index c32e4e9..ad740a4 100644 --- a/build.rs +++ b/build.rs @@ -1,8 +1,5 @@ fn main() { println!("cargo:rerun-if-changed=src/lib.c"); - cc::Build::new() - .file("src/lib.c") - .compile("libvsprintf.a"); + cc::Build::new().file("src/lib.c").compile("libvsprintf.a"); } - diff --git a/src/lib.rs b/src/lib.rs index eaea07d..dc0e001 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,21 +3,17 @@ extern crate libc; use libc::size_t; -use std::iter::repeat; +use std::convert::TryFrom; use std::io::Error; use std::os::raw::*; -const INITIAL_BUFFER_SIZE: usize = 512; - /// The result of a vsprintf call. pub type Result = ::std::result::Result; /// Prints a format string into a Rust string. -pub unsafe fn vsprintf(format: *const c_char, - va_list: *mut V) -> Result { - vsprintf_raw(format, va_list).map(|bytes| { - String::from_utf8(bytes).expect("vsprintf result is not valid utf-8") - }) +pub unsafe fn vsprintf(format: *const c_char, va_list: *mut V) -> Result { + vsprintf_raw(format, va_list) + .map(|bytes| String::from_utf8(bytes).expect("vsprintf result is not valid utf-8")) } /// Return a buffer with the given length, filled with arbitrary data. @@ -30,52 +26,39 @@ fn uninitd_buffer_of_len(len: usize) -> Vec { /// Prints a format string into a list of raw bytes that form /// a null-terminated C string. -pub unsafe fn vsprintf_raw(format: *const c_char, - va_list: *mut V) -> Result> { - let list_ptr = va_list as *mut c_void; +pub unsafe fn vsprintf_raw(format: *const c_char, va_list: *mut V) -> Result> { + let list_ptr = va_list as *mut c_void; - let mut buffer = uninitd_buffer_of_len(INITIAL_BUFFER_SIZE); + let character_count = vsnprintf_wrapper(std::ptr::null_mut(), 0, format, list_ptr); - loop { - let character_count = vsnprintf_wrapper( - buffer.as_mut_ptr(), buffer.len(), format, list_ptr - ); + // Check for errors. + if character_count == -1 { + // C does not require vsprintf to set errno, but POSIX does. + // + // Default handling will just generate an 'unknown' IO error + // if no errno is set. + return Err(Error::last_os_error()); + } - // Check for errors. - if character_count == -1 { - // C does not require vsprintf to set errno, but POSIX does. - // - // Default handling will just generate an 'unknown' IO error - // if no errno is set. - return Err(Error::last_os_error()); - } else { - assert!(character_count >= 0); - let character_count = character_count as usize; + // Include space for NULL byte. + let mut buffer = uninitd_buffer_of_len(usize::try_from(character_count).unwrap() + 1); - // Check if we had enough room in the buffer to fit everything. - // Note that character_count doesn't include the NULL byte, but - // neither do we need to reserve space for it since we'll end up - // dropping it anyway. (vsnprintf will still print as much as it can, - // even if there isn't room for the NULL byte). - if character_count > buffer.len() { - // Reserve enough space and try again. - buffer = uninitd_buffer_of_len(character_count); - continue; - } else { // We fit everything into the buffer. - // Truncate the buffer up until the null terminator. - buffer.truncate(character_count); - buffer.shrink_to_fit(); - break; - } - } - } + let final_character_count = + vsnprintf_wrapper(buffer.as_mut_ptr(), buffer.len(), format, list_ptr); + + assert_eq!(final_character_count, character_count); + + // Drop null byte + assert_eq!(buffer.pop(), Some(0u8)); Ok(buffer) } -extern { - fn vsnprintf_wrapper(buffer: *mut u8, - size: size_t, - format: *const c_char, - va_list: *mut c_void) -> libc::c_int; +extern "C" { + fn vsnprintf_wrapper( + buffer: *mut u8, + size: size_t, + format: *const c_char, + va_list: *mut c_void, + ) -> libc::c_int; } From fa9a307e3043a972501b3157323ed8a9973ad45a Mon Sep 17 00:00:00 2001 From: Jim Newsome Date: Tue, 15 Jun 2021 15:47:18 -0500 Subject: [PATCH 3/3] Be robust to changing buffer length requirement vsnprintf *should* return the exact number of characters that will be printed with the given arguments. In Shadow, I've seen at least one instance of it wanting more space on the second call, though. This could be a bug in Shadow (maybe another thread mutating one of the arguments in between calls, but is difficult to debug. Probably better to be robust here and just reallocate if needed than to crash. --- src/lib.rs | 55 ++++++++++++++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index dc0e001..c02c179 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,6 @@ extern crate libc; use libc::size_t; -use std::convert::TryFrom; use std::io::Error; use std::os::raw::*; @@ -16,41 +15,39 @@ pub unsafe fn vsprintf(format: *const c_char, va_list: *mut V) -> Result Vec { - let mut buffer = Vec::with_capacity(len); - // SAFETY: Any bit pattern is a valid u8. - unsafe { buffer.set_len(len) }; - buffer -} - /// Prints a format string into a list of raw bytes that form /// a null-terminated C string. pub unsafe fn vsprintf_raw(format: *const c_char, va_list: *mut V) -> Result> { let list_ptr = va_list as *mut c_void; - let character_count = vsnprintf_wrapper(std::ptr::null_mut(), 0, format, list_ptr); - - // Check for errors. - if character_count == -1 { - // C does not require vsprintf to set errno, but POSIX does. - // - // Default handling will just generate an 'unknown' IO error - // if no errno is set. - return Err(Error::last_os_error()); + let mut buffer: Vec = Vec::new(); + loop { + let rv = vsnprintf_wrapper(buffer.as_mut_ptr(), buffer.len(), format, list_ptr); + + // Check for errors. + let character_count = if rv < 0 { + // C does not require vsprintf to set errno, but POSIX does. + // + // Default handling will just generate an 'unknown' IO error + // if no errno is set. + return Err(Error::last_os_error()); + } else { + rv as usize + }; + + if character_count >= buffer.len() { + let new_len = character_count + 1; + buffer.reserve_exact(new_len - buffer.len()); + // SAFETY: Any bit pattern is a valid u8, and we reserved the space. + buffer.set_len(new_len); + continue; + } + + // Drop NULL byte and any excess capacity. + buffer.truncate(character_count); + break; } - // Include space for NULL byte. - let mut buffer = uninitd_buffer_of_len(usize::try_from(character_count).unwrap() + 1); - - let final_character_count = - vsnprintf_wrapper(buffer.as_mut_ptr(), buffer.len(), format, list_ptr); - - assert_eq!(final_character_count, character_count); - - // Drop null byte - assert_eq!(buffer.pop(), Some(0u8)); - Ok(buffer) }