diff --git a/CHANGELOG.md b/CHANGELOG.md index 269385e6..75cdc2cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Change Log +## 0.27.1 - 2025-12-18 + +- Performance improvement: Skipped UTF-8 validation for map keys during + deserialization. This significantly speeds up full record decoding by + treating keys as raw bytes when matching against struct fields. +- Performance improvement: Optimized tree traversal by reducing bounds checks + during node reading. + ## 0.27.0 - 2025-11-28 This release includes significant API changes. See [UPGRADING.md](UPGRADING.md) diff --git a/src/decoder.rs b/src/decoder.rs index 99657664..841adf5c 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -446,19 +446,26 @@ impl<'de> Decoder<'de> { } } - /// Reads a string directly, following pointers if needed. - pub(crate) fn read_string(&mut self) -> DecodeResult<&'de str> { + /// Reads a string's bytes directly, following pointers if needed. + /// Does NOT validate UTF-8. + pub(crate) fn read_str_as_bytes(&mut self) -> DecodeResult<&'de [u8]> { let (size, type_num) = self.size_and_type(); if type_num == TYPE_POINTER { // Pointer let new_ptr = self.decode_pointer(size); let saved_ptr = self.current_ptr; self.current_ptr = new_ptr; - let result = self.read_string(); + let result = self.read_str_as_bytes(); self.current_ptr = saved_ptr; result } else if type_num == TYPE_STRING { - self.decode_string(size) + let new_offset = self.current_ptr + size; + if new_offset > self.buf.len() { + return Err(self.invalid_db_error("string length exceeds buffer")); + } + let bytes = &self.buf[self.current_ptr..new_offset]; + self.current_ptr = new_offset; + Ok(bytes) } else { Err(self.invalid_db_error(&format!("expected string, got type {type_num}"))) } @@ -595,10 +602,23 @@ impl<'de: 'a, 'a> de::Deserializer<'de> for &'a mut Decoder<'de> { visitor.visit_enum(EnumAccessor { de: self }) } + fn deserialize_identifier(self, visitor: V) -> DecodeResult + where + V: Visitor<'de>, + { + let (_, type_num) = self.peek_type()?; + if type_num == TYPE_STRING { + let bytes = self.read_str_as_bytes()?; + visitor.visit_borrowed_bytes(bytes) + } else { + self.decode_any(visitor) + } + } + forward_to_deserialize_any! { bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string bytes byte_buf unit unit_struct newtype_struct seq tuple - tuple_struct map struct identifier + tuple_struct map struct } } diff --git a/src/reader.rs b/src/reader.rs index d255eb99..e00456f5 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -450,9 +450,8 @@ impl<'de, S: AsRef<[u8]>> Reader { let val = match self.metadata.record_size { 24 => { let offset = base_offset + index * 3; - (buf[offset] as usize) << 16 - | (buf[offset + 1] as usize) << 8 - | buf[offset + 2] as usize + let bytes = &buf[offset..offset + 3]; + (bytes[0] as usize) << 16 | (bytes[1] as usize) << 8 | bytes[2] as usize } 28 => { let middle = if index != 0 { @@ -461,17 +460,19 @@ impl<'de, S: AsRef<[u8]>> Reader { (buf[base_offset + 3] & 0xF0) >> 4 }; let offset = base_offset + index * 4; + let bytes = &buf[offset..offset + 3]; (middle as usize) << 24 - | (buf[offset] as usize) << 16 - | (buf[offset + 1] as usize) << 8 - | buf[offset + 2] as usize + | (bytes[0] as usize) << 16 + | (bytes[1] as usize) << 8 + | bytes[2] as usize } 32 => { let offset = base_offset + index * 4; - (buf[offset] as usize) << 24 - | (buf[offset + 1] as usize) << 16 - | (buf[offset + 2] as usize) << 8 - | buf[offset + 3] as usize + let bytes = &buf[offset..offset + 4]; + (bytes[0] as usize) << 24 + | (bytes[1] as usize) << 16 + | (bytes[2] as usize) << 8 + | bytes[3] as usize } s => { return Err(MaxMindDbError::invalid_database(format!( diff --git a/src/reader_test.rs b/src/reader_test.rs index f16785b1..2bc78243 100644 --- a/src/reader_test.rs +++ b/src/reader_test.rs @@ -1212,6 +1212,7 @@ fn test_ignored_any() { let _ = env_logger::try_init(); // Struct that only reads some fields, ignoring others via IgnoredAny + #[allow(dead_code)] #[derive(Deserialize, Debug)] struct PartialRead { utf8_string: String, diff --git a/src/result.rs b/src/result.rs index d3ccaaa6..5073b8df 100644 --- a/src/result.rs +++ b/src/result.rs @@ -252,9 +252,10 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { let size = decoder.consume_map_header().map_err(with_path)?; let mut found = false; + let key_bytes = key.as_bytes(); for _ in 0..size { - let k = decoder.read_string().map_err(with_path)?; - if k == key { + let k = decoder.read_str_as_bytes().map_err(with_path)?; + if k == key_bytes { found = true; break; } else {