From b18e13c51d5408c3c114a20c493c9c26c514fd4d Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Tue, 5 May 2026 15:16:01 +0200 Subject: [PATCH 1/4] refacto: cleanup v04 serializer --- .../src/trace_exporter/trace_serializer.rs | 15 ++++--- .../src/msgpack_encoder/v04/mod.rs | 6 +-- libdd-trace-utils/src/trace_utils.rs | 40 +++++++++---------- libdd-trace-utils/src/tracer_payload.rs | 7 +--- 4 files changed, 33 insertions(+), 35 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs b/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs index 93b9909cdc..d0aaf98b35 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs @@ -17,7 +17,7 @@ use libdd_trace_utils::msgpack_encoder; use libdd_trace_utils::span::{v04::Span, TraceData}; use libdd_trace_utils::trace_utils::{self, TracerHeaderTags}; use libdd_trace_utils::tracer_metadata::TracerMetadata; -use libdd_trace_utils::tracer_payload; +use libdd_trace_utils::tracer_payload::{self, TraceEncoding}; /// Minimal capacity of fresh buffers allocated to encode traces, in bytes. const MIN_BUFFER_CAPACITY: usize = 1024; @@ -74,13 +74,16 @@ impl TraceSerializer { &self, traces: Vec>>, ) -> Result, TraceExporterError> { + let map_err = |e: anyhow::Error| { + TraceExporterError::Deserialization(DecodeError::InvalidFormat(e.to_string())) + }; match self.output_format { TraceExporterOutputFormat::V1 => Ok(tracer_payload::TraceChunks::V1(traces)), - format => { - let use_v05_format = matches!(format, TraceExporterOutputFormat::V05); - trace_utils::collect_trace_chunks(traces, use_v05_format).map_err(|e| { - TraceExporterError::Deserialization(DecodeError::InvalidFormat(e.to_string())) - }) + TraceExporterOutputFormat::V04 => { + trace_utils::collect_trace_chunks(traces, TraceEncoding::V04).map_err(map_err) + } + TraceExporterOutputFormat::V05 => { + trace_utils::collect_trace_chunks(traces, TraceEncoding::V05).map_err(map_err) } } } diff --git a/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs index 1c4e0ec3af..5eeffb34a7 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs @@ -123,8 +123,7 @@ pub fn to_vec_with_capacity]>>( capacity: u32, ) -> Vec { let mut buf = ByteBuf::with_capacity(capacity as usize); - #[allow(clippy::expect_used)] - to_writer(&mut buf, traces).expect("infallible: the error is std::convert::Infallible"); + let _ = to_writer(&mut buf, traces); buf.into_vec() } @@ -158,7 +157,6 @@ pub fn to_vec_with_capacity]>>( /// ``` pub fn to_encoded_byte_len]>>(traces: &[S]) -> u32 { let mut counter = super::CountLength(0); - #[allow(clippy::expect_used)] - to_writer(&mut counter, traces).expect("infallible: CountLength never fails"); + let _ = to_writer(&mut counter, traces); counter.0 } diff --git a/libdd-trace-utils/src/trace_utils.rs b/libdd-trace-utils/src/trace_utils.rs index 0837e21bf9..f1e5e6da08 100644 --- a/libdd-trace-utils/src/trace_utils.rs +++ b/libdd-trace-utils/src/trace_utils.rs @@ -7,7 +7,7 @@ use crate::span::v05::dict::SharedDict; use crate::span::{v05, TraceData}; pub use crate::tracer_header_tags::TracerHeaderTags; use crate::tracer_payload::TracerPayloadCollection; -use crate::tracer_payload::{self, TraceChunks}; +use crate::tracer_payload::{self, TraceChunks, TraceEncoding}; use anyhow::anyhow; use bytes::buf::Reader; use bytes::Buf; @@ -592,26 +592,26 @@ pub fn enrich_span_with_azure_function_metadata(span: &mut pb::Span) { pub fn collect_trace_chunks( traces: Vec>>, - use_v05_format: bool, + format: TraceEncoding, ) -> anyhow::Result> { - if use_v05_format { - let mut shared_dict = SharedDict::default(); - let mut v05_traces: Vec> = Vec::with_capacity(traces.len()); - for trace in traces { - let trace_len = trace.len(); - let v05_trace = trace.into_iter().try_fold( - Vec::with_capacity(trace_len), - |mut acc, span| -> anyhow::Result> { - acc.push(v05::from_v04_span(span, &mut shared_dict)?); - Ok(acc) - }, - )?; - - v05_traces.push(v05_trace); + match format { + TraceEncoding::V05 => { + let mut shared_dict = SharedDict::default(); + let mut v05_traces: Vec> = Vec::with_capacity(traces.len()); + for trace in traces { + let trace_len = trace.len(); + let v05_trace = trace.into_iter().try_fold( + Vec::with_capacity(trace_len), + |mut acc, span| -> anyhow::Result> { + acc.push(v05::from_v04_span(span, &mut shared_dict)?); + Ok(acc) + }, + )?; + v05_traces.push(v05_trace); + } + Ok(TraceChunks::V05((shared_dict, v05_traces))) } - Ok(TraceChunks::V05((shared_dict, v05_traces))) - } else { - Ok(TraceChunks::V04(traces)) + TraceEncoding::V04 => Ok(TraceChunks::V04(traces)), } } @@ -1132,7 +1132,7 @@ mod tests { fn test_collect_trace_chunks_v05() { let chunk = vec![create_test_no_alloc_span(123, 456, 789, 1, true)]; - let collection = collect_trace_chunks(vec![chunk], true).unwrap(); + let collection = collect_trace_chunks(vec![chunk], TraceEncoding::V05).unwrap(); let (dict, traces) = match collection { TraceChunks::V05(payload) => payload, diff --git a/libdd-trace-utils/src/tracer_payload.rs b/libdd-trace-utils/src/tracer_payload.rs index 79e603ba75..37ac1a524d 100644 --- a/libdd-trace-utils/src/tracer_payload.rs +++ b/libdd-trace-utils/src/tracer_payload.rs @@ -228,16 +228,13 @@ pub fn decode_to_trace_chunks( data: libdd_tinybytes::Bytes, encoding_type: TraceEncoding, ) -> Result<(TraceChunks, usize), anyhow::Error> { - let (data, size) = match encoding_type { + let (data, size) = match &encoding_type { TraceEncoding::V04 => msgpack_decoder::v04::from_bytes(data), TraceEncoding::V05 => msgpack_decoder::v05::from_bytes(data), } .map_err(|e| anyhow::format_err!("Error deserializing trace from request body: {e}"))?; - Ok(( - collect_trace_chunks(data, matches!(encoding_type, TraceEncoding::V05))?, - size, - )) + Ok((collect_trace_chunks(data, encoding_type)?, size)) } #[cfg(test)] From f9e935a6d990986e7c8b5bce493d5a453c087640 Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Fri, 22 May 2026 13:36:38 +0200 Subject: [PATCH 2/4] fix: address comments --- libdd-common/src/lib.rs | 34 +++++++++++++++++++ .../src/trace_exporter/trace_serializer.rs | 11 +++--- libdd-trace-utils/src/msgpack_encoder/mod.rs | 15 ++++++++ .../src/msgpack_encoder/v04/mod.rs | 12 ++++++- .../src/msgpack_encoder/v1/mod.rs | 14 ++++++-- libdd-trace-utils/src/trace_utils.rs | 33 +++++++++++++----- libdd-trace-utils/src/tracer_payload.rs | 4 +-- 7 files changed, 105 insertions(+), 18 deletions(-) diff --git a/libdd-common/src/lib.rs b/libdd-common/src/lib.rs index 83e913896e..dbd2e4a090 100644 --- a/libdd-common/src/lib.rs +++ b/libdd-common/src/lib.rs @@ -135,6 +135,40 @@ impl RwLockExt for RwLock { } } +/// Extension trait that extracts the value from a `Result` whose error type is uninhabited. +/// +/// The signature constrains callers at compile time: the method is only available when the +/// error type is [`core::convert::Infallible`]. No panics — the compiler proves the `Err` +/// arm unreachable from the type. +/// +/// # Examples +/// +/// ``` +/// use libdd_common::ResultInfallibleExt; +/// use std::convert::Infallible; +/// +/// let result: Result = Ok(42); +/// assert_eq!(result.unwrap_infallible(), 42); +/// ``` +pub trait ResultInfallibleExt: sealed::Sealed { + fn unwrap_infallible(self) -> T; +} + +impl ResultInfallibleExt for Result { + #[inline(always)] + fn unwrap_infallible(self) -> T { + match self { + Ok(value) => value, + Err(never) => match never {}, + } + } +} + +mod sealed { + pub trait Sealed {} + impl Sealed for Result {} +} + pub mod header { #![allow(clippy::declare_interior_mutable_const)] use http::{header::HeaderName, HeaderValue}; diff --git a/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs b/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs index d0aaf98b35..e54af5f9d6 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs @@ -74,16 +74,17 @@ impl TraceSerializer { &self, traces: Vec>>, ) -> Result, TraceExporterError> { - let map_err = |e: anyhow::Error| { - TraceExporterError::Deserialization(DecodeError::InvalidFormat(e.to_string())) - }; match self.output_format { TraceExporterOutputFormat::V1 => Ok(tracer_payload::TraceChunks::V1(traces)), TraceExporterOutputFormat::V04 => { - trace_utils::collect_trace_chunks(traces, TraceEncoding::V04).map_err(map_err) + trace_utils::collect_trace_chunks(traces, TraceEncoding::V04).map_err(|e| { + TraceExporterError::Deserialization(DecodeError::InvalidFormat(e.to_string())) + }) } TraceExporterOutputFormat::V05 => { - trace_utils::collect_trace_chunks(traces, TraceEncoding::V05).map_err(map_err) + trace_utils::collect_trace_chunks(traces, TraceEncoding::V05).map_err(|e| { + TraceExporterError::Deserialization(DecodeError::InvalidFormat(e.to_string())) + }) } } } diff --git a/libdd-trace-utils/src/msgpack_encoder/mod.rs b/libdd-trace-utils/src/msgpack_encoder/mod.rs index 06898a33eb..2d54a03349 100644 --- a/libdd-trace-utils/src/msgpack_encoder/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/mod.rs @@ -4,6 +4,21 @@ pub mod v04; pub mod v1; +use rmp::encode::ValueWriteError; +use std::convert::Infallible; + +/// Flatten `ValueWriteError` (uninhabited because both variants wrap +/// `Infallible`) into the bare `Infallible` so callers can use +/// [`libdd_common::ResultInfallibleExt`]. +#[inline(always)] +pub(crate) fn flatten_value_write_infallible(err: ValueWriteError) -> Infallible { + match err { + ValueWriteError::InvalidMarkerWrite(never) | ValueWriteError::InvalidDataWrite(never) => { + never + } + } +} + /// A writer that counts bytes without storing them, used to compute encoded payload size. pub(crate) struct CountLength(u32); diff --git a/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs index 5eeffb34a7..f216e28c37 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs @@ -3,6 +3,7 @@ use crate::span::v04::Span; use crate::span::TraceData; +use libdd_common::ResultInfallibleExt; use rmp::encode::{write_array_len, ByteBuf, RmpWrite, ValueWriteError}; mod span; @@ -123,7 +124,11 @@ pub fn to_vec_with_capacity]>>( capacity: u32, ) -> Vec { let mut buf = ByteBuf::with_capacity(capacity as usize); - let _ = to_writer(&mut buf, traces); + // `ByteBuf`'s `RmpWrite::Error` is `Infallible`, so `to_writer` cannot fail. The compiler + // proves the `Err` arm unreachable through `unwrap_infallible`. + to_writer(&mut buf, traces) + .map_err(super::flatten_value_write_infallible) + .unwrap_infallible(); buf.into_vec() } @@ -157,6 +162,11 @@ pub fn to_vec_with_capacity]>>( /// ``` pub fn to_encoded_byte_len]>>(traces: &[S]) -> u32 { let mut counter = super::CountLength(0); + // `CountLength` impls `std::io::Write` (whose error type is `std::io::Error`, not + // `Infallible`), so we can't statically prove infallibility via `unwrap_infallible` + // the way we do for `ByteBuf`. In practice `CountLength::write*` only ever return + // `Ok`, so the error path here is unreachable today; should `CountLength` ever grow + // a fallible code path, fuzz tests on the msgpack encoded length would catch it. let _ = to_writer(&mut counter, traces); counter.0 } diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs index 86cef2e027..479a908729 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs @@ -6,6 +6,7 @@ mod span_v04; use crate::span::v04::Span; use crate::span::TraceData; use crate::tracer_metadata::TracerMetadata; +use libdd_common::ResultInfallibleExt; use rmp::encode::{ write_array_len, write_bin, write_map_len, write_sint, write_str, write_uint, write_uint8, ByteBuf, RmpWrite, ValueWriteError, @@ -399,7 +400,11 @@ pub fn to_vec_with_capacity]>>( metadata: &TracerMetadata, ) -> Vec { let mut buf = ByteBuf::with_capacity(capacity as usize); - let _ = encode_payload(&mut buf, traces, metadata); // infallible: ByteBuf write never fails + // `ByteBuf`'s `RmpWrite::Error` is `Infallible`, so `encode_payload` cannot fail. The + // compiler proves the `Err` arm unreachable through `unwrap_infallible`. + encode_payload(&mut buf, traces, metadata) + .map_err(super::flatten_value_write_infallible) + .unwrap_infallible(); buf.into_vec() } @@ -409,7 +414,12 @@ pub fn to_encoded_byte_len]>>( metadata: &TracerMetadata, ) -> u32 { let mut counter = super::CountLength(0); - let _ = encode_payload(&mut counter, traces, metadata); // infallible: CountLength write never fails + // `CountLength` impls `std::io::Write` (whose error type is `std::io::Error`, not + // `Infallible`), so we can't statically prove infallibility via `unwrap_infallible` + // the way we do for `ByteBuf`. In practice `CountLength::write*` only ever return + // `Ok`, so the error path here is unreachable today; should `CountLength` ever grow + // a fallible code path, fuzz tests on the msgpack encoded length would catch it. + let _ = encode_payload(&mut counter, traces, metadata); counter.0 } diff --git a/libdd-trace-utils/src/trace_utils.rs b/libdd-trace-utils/src/trace_utils.rs index f1e5e6da08..3cd53b04d7 100644 --- a/libdd-trace-utils/src/trace_utils.rs +++ b/libdd-trace-utils/src/trace_utils.rs @@ -599,14 +599,10 @@ pub fn collect_trace_chunks( let mut shared_dict = SharedDict::default(); let mut v05_traces: Vec> = Vec::with_capacity(traces.len()); for trace in traces { - let trace_len = trace.len(); - let v05_trace = trace.into_iter().try_fold( - Vec::with_capacity(trace_len), - |mut acc, span| -> anyhow::Result> { - acc.push(v05::from_v04_span(span, &mut shared_dict)?); - Ok(acc) - }, - )?; + let v05_trace = trace + .into_iter() + .map(|span| v05::from_v04_span(span, &mut shared_dict)) + .collect::>>()?; v05_traces.push(v05_trace); } Ok(TraceChunks::V05((shared_dict, v05_traces))) @@ -1203,6 +1199,27 @@ mod tests { ); } + #[test] + fn test_collect_trace_chunks_v04() { + let chunk = vec![create_test_no_alloc_span(123, 456, 789, 1, true)]; + + let collection = collect_trace_chunks(vec![chunk], TraceEncoding::V04).unwrap(); + + let traces = match collection { + TraceChunks::V04(traces) => traces, + _ => panic!("Unexpected type"), + }; + + assert_eq!(traces.len(), 1); + assert_eq!(traces[0].len(), 1); + let span = &traces[0][0]; + assert_eq!(span.trace_id, 123); + assert_eq!(span.span_id, 456); + assert_eq!(span.parent_id, 789); + assert_eq!(span.start, 1); + assert_eq!(span.error, 0); + } + #[test] fn test_rmp_serde_deserialize_meta_with_null_values() { // Create a JSON representation with null value in meta diff --git a/libdd-trace-utils/src/tracer_payload.rs b/libdd-trace-utils/src/tracer_payload.rs index 37ac1a524d..e30f04df91 100644 --- a/libdd-trace-utils/src/tracer_payload.rs +++ b/libdd-trace-utils/src/tracer_payload.rs @@ -12,7 +12,7 @@ use std::iter::Iterator; pub type TracerPayloadV04 = Vec; pub type TracerPayloadV05 = Vec; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy)] /// Enumerates the different encoding types. pub enum TraceEncoding { /// v0.4 encoding (TracerPayloadV04). @@ -228,7 +228,7 @@ pub fn decode_to_trace_chunks( data: libdd_tinybytes::Bytes, encoding_type: TraceEncoding, ) -> Result<(TraceChunks, usize), anyhow::Error> { - let (data, size) = match &encoding_type { + let (data, size) = match encoding_type { TraceEncoding::V04 => msgpack_decoder::v04::from_bytes(data), TraceEncoding::V05 => msgpack_decoder::v05::from_bytes(data), } From 0656b67cbe9d4938c33fb13e19399b67f577a806 Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Tue, 26 May 2026 14:05:54 +0200 Subject: [PATCH 3/4] feat(trace-exporter): add v1 span and its encoder --- .../src/msgpack_encoder/v1/mod.rs | 1029 +++++++++++++++++ .../src/msgpack_encoder/v1/span_v04.rs | 2 - .../src/msgpack_encoder/v1/span_v1.rs | 271 +++++ libdd-trace-utils/src/span/mod.rs | 1 + libdd-trace-utils/src/span/v1/mod.rs | 311 +++++ 5 files changed, 1612 insertions(+), 2 deletions(-) create mode 100644 libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs create mode 100644 libdd-trace-utils/src/span/v1/mod.rs diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs index 479a908729..ca3a97600f 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs @@ -2,8 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 mod span_v04; +mod span_v1; use crate::span::v04::Span; +use crate::span::v1::TracerPayload; use crate::span::TraceData; use crate::tracer_metadata::TracerMetadata; use libdd_common::ResultInfallibleExt; @@ -423,6 +425,162 @@ pub fn to_encoded_byte_len]>>( counter.0 } +/// Encodes a [`TracerPayload`] (V1 canonical data model) as a V1 msgpack payload. +/// +/// This is the M3 encoder. The byte layout matches [`encode_payload`] (the M1 v0.4 → V1 +/// encoder) so equivalent inputs produce byte-identical outputs. +fn encode_payload_v1( + writer: &mut W, + payload: &TracerPayload, +) -> Result<(), ValueWriteError> { + let mut table = StringTable::new(); + + let has_attributes = !payload.attributes.is_empty(); + + let map_len = 1u32 // chunks always present + + (!payload.language_name.borrow().is_empty()) as u32 + + (!payload.language_version.borrow().is_empty()) as u32 + + (!payload.tracer_version.borrow().is_empty()) as u32 + + (!payload.runtime_id.borrow().is_empty()) as u32 + + (!payload.env.borrow().is_empty()) as u32 + + (!payload.hostname.borrow().is_empty()) as u32 + + (!payload.app_version.borrow().is_empty()) as u32 + + has_attributes as u32; + + write_map_len(writer, map_len)?; + + if !payload.language_name.borrow().is_empty() { + write_uint8(writer, trace_key::LANGUAGE_NAME)?; + table.write_interned(writer, payload.language_name.borrow())?; + } + + if !payload.language_version.borrow().is_empty() { + write_uint8(writer, trace_key::LANGUAGE_VERSION)?; + table.write_interned(writer, payload.language_version.borrow())?; + } + + if !payload.tracer_version.borrow().is_empty() { + write_uint8(writer, trace_key::TRACER_VERSION)?; + table.write_interned(writer, payload.tracer_version.borrow())?; + } + + if !payload.runtime_id.borrow().is_empty() { + write_uint8(writer, trace_key::RUNTIME_ID)?; + table.write_interned(writer, payload.runtime_id.borrow())?; + } + + if !payload.env.borrow().is_empty() { + write_uint8(writer, trace_key::ENV_REF)?; + table.write_interned(writer, payload.env.borrow())?; + } + + if !payload.hostname.borrow().is_empty() { + write_uint8(writer, trace_key::HOSTNAME_REF)?; + table.write_interned(writer, payload.hostname.borrow())?; + } + + if !payload.app_version.borrow().is_empty() { + write_uint8(writer, trace_key::APP_VERSION_REF)?; + table.write_interned(writer, payload.app_version.borrow())?; + } + + if has_attributes { + write_uint8(writer, trace_key::ATTRIBUTES)?; + span_v1::encode_attributes_map(writer, &payload.attributes, &mut table)?; + } + + write_uint8(writer, trace_key::CHUNKS)?; + write_array_len(writer, payload.chunks.len() as u32)?; + for chunk in &payload.chunks { + encode_chunk_v1(writer, chunk, &mut table)?; + } + + Ok(()) +} + +fn encode_chunk_v1( + writer: &mut W, + chunk: &crate::span::v1::TraceChunk, + table: &mut StringTable, +) -> Result<(), ValueWriteError> { + let has_origin = chunk + .origin + .as_ref() + .is_some_and(|o| !>::borrow(o).is_empty()); + + let fields = 2u32 // trace_id + spans + + has_origin as u32 + + chunk.priority.is_some() as u32 + + chunk.sampling_mechanism.is_some() as u32; + + write_map_len(writer, fields)?; + + write_uint8(writer, chunk_key::TRACE_ID)?; + write_bin(writer, &chunk.trace_id)?; + + if let Some(origin) = chunk + .origin + .as_ref() + .filter(|o| !>::borrow(o).is_empty()) + { + write_uint8(writer, chunk_key::ORIGIN)?; + table.write_interned(writer, >::borrow(origin))?; + } + + if let Some(priority) = chunk.priority { + write_uint8(writer, chunk_key::PRIORITY)?; + write_sint(writer, priority as i64)?; + } + + if let Some(mechanism) = chunk.sampling_mechanism { + write_uint8(writer, chunk_key::SAMPLING_MECHANISM)?; + write_uint(writer, mechanism as u64)?; + } + + write_uint8(writer, chunk_key::SPANS)?; + write_array_len(writer, chunk.spans.len() as u32)?; + for span in &chunk.spans { + span_v1::encode_span(writer, span, table)?; + } + + Ok(()) +} + +/// Serializes a V1 [`TracerPayload`] into a `Vec` using the V1 msgpack format. +pub fn to_vec_from_payload(payload: &TracerPayload) -> Vec { + to_vec_from_payload_with_capacity(payload, 0) +} + +/// Serializes a V1 [`TracerPayload`] into a `Vec` with a pre-allocated capacity. +pub fn to_vec_from_payload_with_capacity( + payload: &TracerPayload, + capacity: u32, +) -> Vec { + let mut buf = ByteBuf::with_capacity(capacity as usize); + encode_payload_v1(&mut buf, payload) + .map_err(super::flatten_value_write_infallible) + .unwrap_infallible(); + buf.into_vec() +} + +/// Serializes a V1 [`TracerPayload`] into a caller-provided slice. +/// +/// # Errors +/// Returns a `ValueWriteError` if the underlying writer fails (e.g. buffer too small). +pub fn write_payload_to_slice( + slice: &mut &mut [u8], + payload: &TracerPayload, +) -> Result<(), ValueWriteError> { + encode_payload_v1(slice, payload) +} + +/// Returns the number of bytes the V1 payload for `payload` would occupy when encoded. +pub fn to_encoded_byte_len_from_payload(payload: &TracerPayload) -> u32 { + let mut counter = super::CountLength(0); + let _ = encode_payload_v1(&mut counter, payload); + counter.0 +} + #[cfg(test)] mod tests { use super::*; @@ -866,3 +1024,874 @@ mod tests { ); } } + +#[cfg(test)] +mod v1_payload_tests { + //! Unit tests for the M3 encoder (`encode_payload_v1`). + //! + //! Verifies the encoder produces a valid V1 payload from the canonical + //! [`crate::span::v1::TracerPayload`] data model and that core invariants (interning, byte + //! length, optional fields) hold. + + use super::*; + use crate::span::v1::{ + AttributeValue, Span as V1Span, SpanBytes as V1SpanBytes, SpanKind, TraceChunkBytes, + TracerPayloadBytes, + }; + use libdd_tinybytes::BytesString; + + fn bs(s: &str) -> BytesString { + BytesString::from_slice(s.as_bytes()).unwrap_or_default() + } + + fn make_span(service: &str, name: &str, span_id: u64) -> V1SpanBytes { + V1Span { + service: bs(service), + name: bs(name), + resource: bs("res"), + span_id, + start: 1_000_000, + duration: 500, + ..Default::default() + } + } + + fn make_chunk(spans: Vec, trace_id: [u8; 16]) -> TraceChunkBytes { + TraceChunkBytes { + trace_id, + spans, + ..Default::default() + } + } + + #[test] + fn empty_payload_is_valid_msgpack_map() { + let payload = TracerPayloadBytes::default(); + let encoded = to_vec_from_payload(&payload); + // Map with a single entry (chunks), then an empty array. `0x81` = fixmap of length 1, + // followed by chunk key (0x0b), then `0x90` (fixarray length 0). + assert_eq!(encoded, vec![0x81, 0x0b, 0x90]); + } + + #[test] + fn payload_byte_len_matches_to_vec() { + let chunk = make_chunk(vec![make_span("svc", "op", 1)], [0u8; 16]); + let payload = TracerPayloadBytes { + chunks: vec![chunk], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + let len = to_encoded_byte_len_from_payload(&payload); + assert_eq!(encoded.len() as u32, len); + } + + #[test] + fn span_kind_is_always_emitted_as_uint() { + // Default SpanKind (Internal=1) must be emitted. The encoded payload contains + // `kind_key (0x10) | uint 1 (0x01)`. + let chunk = make_chunk(vec![make_span("svc", "op", 1)], [0u8; 16]); + let payload = TracerPayloadBytes { + chunks: vec![chunk], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + let pat = [0x10u8, 0x01u8]; + assert!( + encoded.windows(2).any(|w| w == pat), + "Kind (key=16) Internal (=1) must be emitted" + ); + } + + #[test] + fn typed_attributes_carry_correct_type_discriminants() { + let mut attrs = HashMap::new(); + attrs.insert(bs("k_str"), AttributeValue::String(bs("v"))); + let span = V1Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1, + duration: 1, + attributes: attrs, + ..Default::default() + }; + let chunk = make_chunk(vec![span], [0u8; 16]); + let payload = TracerPayloadBytes { + chunks: vec![chunk], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + // String attribute → type discriminant = 1 (`AnyValueKey::String`). + assert!( + encoded.windows(b"k_str".len()).any(|w| w == b"k_str"), + "attribute key must appear" + ); + } + + #[test] + fn bytes_attribute_uses_bin_marker() { + // A Bytes attribute must use the msgpack `bin` family, not `str`. + let mut attrs = HashMap::new(); + attrs.insert( + bs("payload"), + AttributeValue::Bytes(libdd_tinybytes::Bytes::copy_from_slice(b"\xde\xad")), + ); + let span = V1Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1, + duration: 1, + attributes: attrs, + ..Default::default() + }; + let payload = TracerPayloadBytes { + chunks: vec![make_chunk(vec![span], [0u8; 16])], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + // bin8 marker `0xc4` followed by length `0x02` and the bytes themselves. + let want = [0xc4u8, 0x02, 0xde, 0xad]; + assert!( + encoded.windows(4).any(|w| w == want), + "Bytes attribute must be encoded as msgpack bin" + ); + } + + #[test] + fn list_and_keyvalue_attributes_round_trip_through_recursion() { + let mut nested = HashMap::new(); + nested.insert(bs("nk"), AttributeValue::Int(7)); + let mut attrs = HashMap::new(); + attrs.insert( + bs("list"), + AttributeValue::List(vec![ + AttributeValue::String(bs("a")), + AttributeValue::Bool(true), + ]), + ); + attrs.insert(bs("kv"), AttributeValue::KeyValue(nested)); + let span = V1Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1, + duration: 1, + attributes: attrs, + ..Default::default() + }; + let payload = TracerPayloadBytes { + chunks: vec![make_chunk(vec![span], [0u8; 16])], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + // The keys and the nested key must all appear at least once. + for s in &[b"list" as &[u8], b"kv", b"a", b"nk"] { + assert!( + encoded.windows(s.len()).any(|w| w == *s), + "{} should appear in payload", + std::str::from_utf8(s).unwrap() + ); + } + } + + #[test] + fn promoted_fields_at_payload_level() { + let payload = TracerPayloadBytes { + language_name: bs("python"), + language_version: bs("3.11"), + tracer_version: bs("2.0.0"), + runtime_id: bs("rt-1"), + env: bs("prod"), + hostname: bs("h"), + app_version: bs("1.2.3"), + chunks: vec![make_chunk(vec![make_span("svc", "op", 1)], [0u8; 16])], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + for s in &[ + b"python" as &[u8], + b"3.11", + b"2.0.0", + b"rt-1", + b"prod", + b"1.2.3", + ] { + assert!( + encoded.windows(s.len()).any(|w| w == *s), + "{} should appear", + std::str::from_utf8(s).unwrap() + ); + } + } + + #[test] + fn chunk_level_attrs_emitted_when_set() { + let chunk = TraceChunkBytes { + trace_id: [0u8; 16], + priority: Some(1), + origin: Some(bs("lambda")), + sampling_mechanism: Some(4), + spans: vec![make_span("svc", "op", 1)], + ..Default::default() + }; + let payload = TracerPayloadBytes { + chunks: vec![chunk], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + assert!( + encoded.windows(b"lambda".len()).any(|w| w == b"lambda"), + "chunk origin should appear" + ); + // sampling_mechanism=4 → SAMPLING_MECHANISM (0x07) + positive fixint 0x04 + let want = [chunk_key::SAMPLING_MECHANISM, 0x04]; + assert!(encoded.windows(2).any(|w| w == want)); + } + + #[test] + fn span_kind_otel_values() { + for (kind, expected_byte) in [ + (SpanKind::Internal, 0x01u8), + (SpanKind::Server, 0x02), + (SpanKind::Client, 0x03), + (SpanKind::Producer, 0x04), + (SpanKind::Consumer, 0x05), + ] { + let span = V1Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1, + duration: 1, + span_kind: kind, + ..Default::default() + }; + let payload = TracerPayloadBytes { + chunks: vec![make_chunk(vec![span], [0u8; 16])], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + let want = [0x10u8, expected_byte]; + assert!( + encoded.windows(2).any(|w| w == want), + "SpanKind {kind:?} should produce byte {expected_byte:#x}" + ); + } + } + + #[test] + fn string_interning_works_across_chunks() { + // The string "shared" appears in two chunks. The second occurrence must be a uint ID, + // not a fresh str. Compare against a baseline with a single occurrence to verify. + let chunk_with_two = TracerPayloadBytes { + chunks: vec![ + make_chunk(vec![make_span("shared", "op1", 1)], [0u8; 16]), + make_chunk(vec![make_span("shared", "op2", 2)], [0u8; 16]), + ], + ..Default::default() + }; + let single = TracerPayloadBytes { + chunks: vec![make_chunk(vec![make_span("shared", "op1", 1)], [0u8; 16])], + ..Default::default() + }; + let two = to_vec_from_payload(&chunk_with_two); + let one = to_vec_from_payload(&single); + assert!( + two.len() < 2 * one.len(), + "interning should reduce repeated payload size" + ); + } +} + +#[cfg(test)] +mod cross_validation_tests { + //! Cross-validates that the M1 encoder (v0.4 spans → V1 payload) and the M3 encoder + //! (v1::Span → V1 payload) produce **byte-identical** output for equivalent inputs. + //! + //! All tests are limited to deterministic content (at most one attribute key per map) so the + //! `HashMap` iteration order cannot diverge between the two inputs. + + use super::*; + use crate::span::v04::SpanBytes as V04Span; + use crate::span::v1::{ + AttributeValue, SpanBytes as V1SpanBytes, SpanKind, TraceChunkBytes, TracerPayloadBytes, + }; + use libdd_tinybytes::BytesString; + + fn bs(s: &str) -> BytesString { + BytesString::from_slice(s.as_bytes()).unwrap_or_default() + } + + /// Builds a 128-bit big-endian trace_id from `(high, low)` 64-bit halves. + fn tid_bytes(high: u64, low: u64) -> [u8; 16] { + let mut out = [0u8; 16]; + out[..8].copy_from_slice(&high.to_be_bytes()); + out[8..].copy_from_slice(&low.to_be_bytes()); + out + } + + /// Asserts that encoding `v04` (with `metadata`) via M1 produces the same bytes as + /// encoding `v1` via M3. Includes a hex-diff message on mismatch. + #[track_caller] + fn assert_byte_equal( + v04_traces: &[Vec], + metadata: &TracerMetadata, + v1_payload: &TracerPayloadBytes, + ) { + let m1 = to_vec(v04_traces, metadata); + let m3 = to_vec_from_payload(v1_payload); + if m1 != m3 { + panic!( + "M1 and M3 encoders diverged:\n M1 ({:3} bytes): {}\n M3 ({:3} bytes): {}", + m1.len(), + hex_dump(&m1), + m3.len(), + hex_dump(&m3) + ); + } + } + + fn hex_dump(b: &[u8]) -> String { + b.iter().map(|c| format!("{c:02x}")).collect::() + } + + #[test] + fn empty_payload_byte_identical() { + let v04: Vec> = vec![]; + let v1 = TracerPayloadBytes::default(); + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn minimal_single_span_byte_identical() { + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 0x42, + span_id: 1, + start: 1_000_000, + duration: 500, + ..Default::default() + }]]; + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 0x42), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1_000_000, + duration: 500, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn span_with_parent_and_error_byte_identical() { + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 2, + parent_id: 1, + start: 1000, + duration: 100, + error: 1, + ..Default::default() + }]]; + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 2, + parent_id: 1, + start: 1000, + duration: 100, + error: true, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn promoted_fields_byte_identical() { + // M1 reads env/version/component/span.kind from v04 meta and promotes them; M3 takes + // them directly from the v1::Span fields. Both must produce the same bytes. + let mut meta = HashMap::new(); + meta.insert(bs("env"), bs("prod")); + meta.insert(bs("version"), bs("1.2.3")); + meta.insert(bs("component"), bs("flask")); + meta.insert(bs("span.kind"), bs("server")); + + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + meta, + ..Default::default() + }]]; + + // metadata.env populated → M1 picks env from metadata first (it's set on the builder). + let metadata = TracerMetadata { + env: "prod".to_string(), + app_version: "1.2.3".to_string(), + ..Default::default() + }; + + let v1 = TracerPayloadBytes { + env: bs("prod"), + app_version: bs("1.2.3"), + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + env: bs("prod"), + version: bs("1.2.3"), + component: bs("flask"), + span_kind: SpanKind::Server, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &metadata, &v1); + } + + #[test] + fn single_string_meta_attribute_byte_identical() { + // One non-promoted meta tag → one attribute triplet. With a single entry the HashMap + // iteration order cannot vary. + let mut meta = HashMap::new(); + meta.insert(bs("custom.tag"), bs("hello")); + + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + meta, + ..Default::default() + }]]; + + let mut attrs = HashMap::new(); + attrs.insert(bs("custom.tag"), AttributeValue::String(bs("hello"))); + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + attributes: attrs, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn single_float_metric_byte_identical() { + let mut metrics = HashMap::new(); + metrics.insert(bs("score"), 1.5f64); + + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + metrics, + ..Default::default() + }]]; + + let mut attrs = HashMap::new(); + attrs.insert(bs("score"), AttributeValue::Float(1.5)); + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + attributes: attrs, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn single_bytes_meta_struct_byte_identical() { + let mut meta_struct = HashMap::new(); + meta_struct.insert( + bs("payload"), + libdd_tinybytes::Bytes::copy_from_slice(b"\xde\xad\xbe\xef"), + ); + + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + meta_struct, + ..Default::default() + }]]; + + let mut attrs = HashMap::new(); + attrs.insert( + bs("payload"), + AttributeValue::Bytes(libdd_tinybytes::Bytes::copy_from_slice(b"\xde\xad\xbe\xef")), + ); + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + attributes: attrs, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn chunk_origin_only_byte_identical() { + // The M1 encoder's `is_promoted` filter only strips env/version/component/span.kind/ + // _dd.p.tid — it intentionally keeps `_dd.origin` in span attributes even though it's + // also lifted to the chunk. M3 must reproduce that duplication for byte equality. + let mut meta = HashMap::new(); + meta.insert(bs("_dd.origin"), bs("lambda")); + + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + meta, + ..Default::default() + }]]; + + let mut attrs = HashMap::new(); + attrs.insert(bs("_dd.origin"), AttributeValue::String(bs("lambda"))); + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + origin: Some(bs("lambda")), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + attributes: attrs, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn trace_id_128_bit_from_dd_p_tid_byte_identical() { + let mut meta = HashMap::new(); + meta.insert(bs("_dd.p.tid"), bs("640cfd5400000000")); + + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 0x0123456789abcdef, + span_id: 1, + start: 1000, + duration: 100, + meta, + ..Default::default() + }]]; + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0x640cfd5400000000, 0x0123456789abcdef), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn tracer_metadata_fields_byte_identical() { + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + ..Default::default() + }]]; + let metadata = TracerMetadata { + language: "python".to_string(), + language_version: "3.11".to_string(), + tracer_version: "2.0.0".to_string(), + runtime_id: "abc-uuid".to_string(), + hostname: "h1".to_string(), + ..Default::default() + }; + + let v1 = TracerPayloadBytes { + language_name: bs("python"), + language_version: bs("3.11"), + tracer_version: bs("2.0.0"), + runtime_id: bs("abc-uuid"), + hostname: bs("h1"), + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &metadata, &v1); + } + + #[test] + fn payload_attribute_git_commit_sha_byte_identical() { + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + ..Default::default() + }]]; + let metadata = TracerMetadata { + git_commit_sha: "abc123".to_string(), + ..Default::default() + }; + + let mut payload_attrs = HashMap::new(); + payload_attrs.insert( + bs("_dd.git.commit.sha"), + AttributeValue::String(bs("abc123")), + ); + + let v1 = TracerPayloadBytes { + attributes: payload_attrs, + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &metadata, &v1); + } + + #[test] + fn span_with_single_link_byte_identical() { + let v04_span = V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + span_links: vec![crate::span::v04::SpanLink { + trace_id: 0x0123456789abcdef, + trace_id_high: 0, + span_id: 99, + tracestate: bs("running"), + flags: 0, + attributes: HashMap::new(), + }], + ..Default::default() + }; + let v04 = vec![vec![v04_span]]; + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + span_links: vec![crate::span::v1::SpanLinkBytes { + trace_id: tid_bytes(0, 0x0123456789abcdef), + span_id: 99, + tracestate: bs("running"), + flags: 0, + attributes: HashMap::new(), + }], + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn span_with_single_event_byte_identical() { + use crate::span::v04::{AttributeAnyValue, AttributeArrayValue}; + + let v04_span = V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + span_events: vec![crate::span::v04::SpanEvent { + time_unix_nano: 42, + name: bs("exception"), + attributes: HashMap::from([( + bs("exception.message"), + AttributeAnyValue::SingleValue(AttributeArrayValue::String(bs("boom"))), + )]), + }], + ..Default::default() + }; + let v04 = vec![vec![v04_span]]; + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + span_events: vec![crate::span::v1::SpanEventBytes { + time_unix_nano: 42, + name: bs("exception"), + attributes: HashMap::from([( + bs("exception.message"), + AttributeValue::String(bs("boom")), + )]), + }], + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } +} diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs b/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs index 2c5962f8f1..022f80a7af 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs @@ -61,8 +61,6 @@ pub(super) enum AnyValueKey { Int64 = 4, Bytes = 5, Array = 6, - /// Not used in V04→V1 conversion (V04 has no key-value list type), defined for completeness. - #[allow(dead_code)] KeyValueList = 7, } diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs new file mode 100644 index 0000000000..e7af11fd84 --- /dev/null +++ b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs @@ -0,0 +1,271 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! V1 msgpack encoder that consumes the canonical [`crate::span::v1`] data model. +//! +//! Mirrors the encoder in [`super::span_v04`] but takes pre-promoted fields directly from the +//! struct instead of extracting them from v0.4 meta. The byte layout is identical so payloads +//! produced from equivalent inputs by either encoder are byte-for-byte equal — see the +//! cross-validation tests in [`super::cross_validation_tests`]. + +use crate::span::v1::{AttributeValue, Span, SpanEvent, SpanLink}; +use crate::span::TraceData; +use rmp::encode::{ + write_array_len, write_bin, write_bool, write_f64, write_map_len, write_sint, write_u64, + write_uint, write_uint8, RmpWrite, ValueWriteError, +}; +use std::borrow::Borrow; + +use super::span_v04::{AnyValueKey, SpanEventKey, SpanKey, SpanLinkKey}; +use super::StringTable; + +/// Encodes a typed [`AttributeValue`] as `[type_uint8, value]`. +/// +/// Recursive: [`AttributeValue::List`] and [`AttributeValue::KeyValue`] contain nested +/// [`AttributeValue`]s that are encoded in the same `[type, value]` shape. +pub(super) fn encode_attribute_value( + writer: &mut W, + value: &AttributeValue, + table: &mut StringTable, +) -> Result<(), ValueWriteError> { + match value { + AttributeValue::String(s) => { + write_uint8(writer, AnyValueKey::String as u8)?; + table.write_interned(writer, s.borrow())?; + } + AttributeValue::Bool(b) => { + write_uint8(writer, AnyValueKey::Bool as u8)?; + write_bool(writer, *b).map_err(ValueWriteError::InvalidDataWrite)?; + } + AttributeValue::Float(f) => { + write_uint8(writer, AnyValueKey::Double as u8)?; + write_f64(writer, *f)?; + } + AttributeValue::Int(i) => { + write_uint8(writer, AnyValueKey::Int64 as u8)?; + write_sint(writer, *i)?; + } + AttributeValue::Bytes(b) => { + write_uint8(writer, AnyValueKey::Bytes as u8)?; + write_bin(writer, b.borrow())?; + } + AttributeValue::List(arr) => { + write_uint8(writer, AnyValueKey::Array as u8)?; + write_array_len(writer, arr.len() as u32)?; + for v in arr { + encode_attribute_value(writer, v, table)?; + } + } + AttributeValue::KeyValue(map) => { + write_uint8(writer, AnyValueKey::KeyValueList as u8)?; + write_map_len(writer, map.len() as u32)?; + for (k, v) in map { + table.write_interned(writer, k.borrow())?; + encode_attribute_value(writer, v, table)?; + } + } + } + Ok(()) +} + +/// Encodes a flat triplet attributes array: `[key, type_uint8, value, key, type_uint8, value, ...]`. +/// +/// The array length is `3 * map.len()` per the V1 wire format. +pub(super) fn encode_attributes_map( + writer: &mut W, + map: &std::collections::HashMap>, + table: &mut StringTable, +) -> Result<(), ValueWriteError> { + write_array_len(writer, (map.len() as u32) * 3)?; + for (k, v) in map { + table.write_interned(writer, k.borrow())?; + encode_attribute_value(writer, v, table)?; + } + Ok(()) +} + +/// Encodes span links from the V1 data model. +pub(super) fn encode_span_links( + writer: &mut W, + span_links: &[SpanLink], + table: &mut StringTable, +) -> Result<(), ValueWriteError> { + write_uint8(writer, SpanKey::SpanLinks as u8)?; + write_array_len(writer, span_links.len() as u32)?; + + for link in span_links { + let link_len = 1 // trace_id (always) + + (link.span_id != 0) as u32 + + (!link.attributes.is_empty()) as u32 + + (!link.tracestate.borrow().is_empty()) as u32 + + (link.flags != 0) as u32; + + write_map_len(writer, link_len)?; + + write_uint8(writer, SpanLinkKey::TraceId as u8)?; + write_bin(writer, &link.trace_id)?; + + if link.span_id != 0 { + write_uint8(writer, SpanLinkKey::SpanId as u8)?; + write_u64(writer, link.span_id)?; + } + + if !link.attributes.is_empty() { + write_uint8(writer, SpanLinkKey::Attributes as u8)?; + encode_attributes_map(writer, &link.attributes, table)?; + } + + if !link.tracestate.borrow().is_empty() { + write_uint8(writer, SpanLinkKey::TraceState as u8)?; + table.write_interned(writer, link.tracestate.borrow())?; + } + + if link.flags != 0 { + write_uint8(writer, SpanLinkKey::Flags as u8)?; + write_uint(writer, link.flags as u64)?; + } + } + + Ok(()) +} + +/// Encodes span events from the V1 data model. +pub(super) fn encode_span_events( + writer: &mut W, + span_events: &[SpanEvent], + table: &mut StringTable, +) -> Result<(), ValueWriteError> { + write_uint8(writer, SpanKey::SpanEvents as u8)?; + write_array_len(writer, span_events.len() as u32)?; + + for event in span_events { + let event_len = 2 // time + name + + (!event.attributes.is_empty()) as u32; + + write_map_len(writer, event_len)?; + + write_uint8(writer, SpanEventKey::Time as u8)?; + write_u64(writer, event.time_unix_nano)?; + + write_uint8(writer, SpanEventKey::Name as u8)?; + table.write_interned(writer, event.name.borrow())?; + + if !event.attributes.is_empty() { + write_uint8(writer, SpanEventKey::Attributes as u8)?; + encode_attributes_map(writer, &event.attributes, table)?; + } + } + + Ok(()) +} + +/// Encodes a [`Span`] (V1 data model) into V1 msgpack. +/// +/// Field-write order matches [`super::span_v04::encode_span`] so equivalent inputs produce +/// byte-identical output across the two encoders. +pub(super) fn encode_span( + writer: &mut W, + span: &Span, + table: &mut StringTable, +) -> Result<(), ValueWriteError> { + let is_parent = span.parent_id != 0; + let has_duration = span.duration != 0; + let has_error = span.error; + let has_attributes = !span.attributes.is_empty(); + let has_env = !span.env.borrow().is_empty(); + let has_version = !span.version.borrow().is_empty(); + let has_component = !span.component.borrow().is_empty(); + + let span_len = 3 // span_id, start, kind — always present + + (!span.service.borrow().is_empty()) as u32 + + (!span.name.borrow().is_empty()) as u32 + + (!span.resource.borrow().is_empty()) as u32 + + (!span.r#type.borrow().is_empty()) as u32 + + is_parent as u32 + + has_duration as u32 + + has_error as u32 + + has_attributes as u32 + + (!span.span_links.is_empty()) as u32 + + (!span.span_events.is_empty()) as u32 + + has_env as u32 + + has_version as u32 + + has_component as u32; + + write_map_len(writer, span_len)?; + + if !span.service.borrow().is_empty() { + write_uint8(writer, SpanKey::Service as u8)?; + table.write_interned(writer, span.service.borrow())?; + } + + if !span.name.borrow().is_empty() { + write_uint8(writer, SpanKey::Name as u8)?; + table.write_interned(writer, span.name.borrow())?; + } + + if !span.resource.borrow().is_empty() { + write_uint8(writer, SpanKey::Resource as u8)?; + table.write_interned(writer, span.resource.borrow())?; + } + + write_uint8(writer, SpanKey::SpanId as u8)?; + write_u64(writer, span.span_id)?; + + write_uint8(writer, SpanKey::Start as u8)?; + // V1 normalization is the producer's responsibility: a negative `start` is not expected in + // the canonical data model. Cast preserves bits — callers that need wall-clock substitution + // should perform it before constructing the v1::Span. + write_u64(writer, span.start as u64)?; + + if is_parent { + write_uint8(writer, SpanKey::ParentId as u8)?; + write_u64(writer, span.parent_id)?; + } + + if has_duration { + write_uint8(writer, SpanKey::Duration as u8)?; + // Same rationale as for `start`: V1 inputs are expected to be normalized. + write_u64(writer, span.duration.max(0) as u64)?; + } + + if has_error { + write_uint8(writer, SpanKey::Error as u8)?; + write_bool(writer, true).map_err(ValueWriteError::InvalidDataWrite)?; + } + + if !span.r#type.borrow().is_empty() { + write_uint8(writer, SpanKey::Type as u8)?; + table.write_interned(writer, span.r#type.borrow())?; + } + + if has_attributes { + write_uint8(writer, SpanKey::Attributes as u8)?; + encode_attributes_map(writer, &span.attributes, table)?; + } + + if !span.span_links.is_empty() { + encode_span_links(writer, &span.span_links, table)?; + } + + if !span.span_events.is_empty() { + encode_span_events(writer, &span.span_events, table)?; + } + + if has_env { + write_uint8(writer, SpanKey::Env as u8)?; + table.write_interned(writer, span.env.borrow())?; + } + if has_version { + write_uint8(writer, SpanKey::Version as u8)?; + table.write_interned(writer, span.version.borrow())?; + } + if has_component { + write_uint8(writer, SpanKey::Component as u8)?; + table.write_interned(writer, span.component.borrow())?; + } + // SpanKind is always emitted per OTEL spec (default = Internal = 1). + write_uint8(writer, SpanKey::Kind as u8)?; + write_uint(writer, span.span_kind as u64)?; + + Ok(()) +} diff --git a/libdd-trace-utils/src/span/mod.rs b/libdd-trace-utils/src/span/mod.rs index e6358dfc7a..ceb68e5b53 100644 --- a/libdd-trace-utils/src/span/mod.rs +++ b/libdd-trace-utils/src/span/mod.rs @@ -4,6 +4,7 @@ pub mod trace_utils; pub mod v04; pub mod v05; +pub mod v1; use crate::msgpack_decoder::decode::buffer::read_string_ref_nomut; use crate::msgpack_decoder::decode::error::DecodeError; diff --git a/libdd-trace-utils/src/span/v1/mod.rs b/libdd-trace-utils/src/span/v1/mod.rs new file mode 100644 index 0000000000..993aab7b05 --- /dev/null +++ b/libdd-trace-utils/src/span/v1/mod.rs @@ -0,0 +1,311 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Canonical internal representation of a V1 trace. +//! +//! See the design doc and `RFC: Efficient Trace Payload Protocol`. Compared to v0.4, V1: +//! - promotes `env`, `version`, `component`, and `span.kind` out of the meta map into dedicated +//! span fields; +//! - merges `meta`, `metrics`, and `meta_struct` into a single typed [`AttributeValue`] map; +//! - represents `error` as `bool` and `trace_id` as a 128-bit big-endian byte array carried at the +//! chunk level. + +use crate::span::{BytesData, SliceData, TraceData}; +use std::collections::HashMap; + +/// OpenTelemetry SpanKind values, encoded on the wire as a `uint32`. +/// +/// Unset / unknown kinds default to [`SpanKind::Internal`] to match the OTEL spec and the agent's +/// behavior in `pkg/trace/api/converter.go`. +#[repr(u32)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum SpanKind { + #[default] + Internal = 1, + Server = 2, + Client = 3, + Producer = 4, + Consumer = 5, +} + +impl SpanKind { + /// Parses the legacy v0.4 `span.kind` meta string into a [`SpanKind`]. + /// + /// Unrecognized values map to [`SpanKind::Internal`] per OTEL semantics. This is the + /// infallible counterpart to [`FromStr::from_str`]: callers converting from v0.4 always have a + /// well-defined SpanKind, even if the upstream tag is missing or invalid. + pub fn from_meta(s: &str) -> Self { + match s { + "server" => SpanKind::Server, + "client" => SpanKind::Client, + "producer" => SpanKind::Producer, + "consumer" => SpanKind::Consumer, + _ => SpanKind::Internal, + } + } +} + +/// Typed V1 attribute value. +/// +/// Replaces v0.4's split `meta` / `metrics` / `meta_struct` maps. The byte layout on the wire is a +/// `(key, type_uint8, value)` triplet — see `msgpack_encoder::v1::span_v1`. +#[derive(Debug, PartialEq)] +pub enum AttributeValue { + String(T::Text), + Float(f64), + Int(i64), + Bool(bool), + Bytes(T::Bytes), + KeyValue(HashMap>), + List(Vec>), +} + +impl Clone for AttributeValue +where + T::Text: Clone, + T::Bytes: Clone, +{ + fn clone(&self) -> Self { + match self { + AttributeValue::String(v) => AttributeValue::String(v.clone()), + AttributeValue::Float(v) => AttributeValue::Float(*v), + AttributeValue::Int(v) => AttributeValue::Int(*v), + AttributeValue::Bool(v) => AttributeValue::Bool(*v), + AttributeValue::Bytes(v) => AttributeValue::Bytes(v.clone()), + AttributeValue::KeyValue(m) => AttributeValue::KeyValue(m.clone()), + AttributeValue::List(v) => AttributeValue::List(v.clone()), + } + } +} + +/// Canonical V1 span model. +/// +/// Generic over [`TraceData`] so the same type can be used with owned (`BytesData`) or borrowed +/// (`SliceData`) string buffers — matching the v0.4 [`crate::span::v04::Span`] pattern. +#[derive(Debug, PartialEq, Default)] +pub struct Span { + pub service: T::Text, + pub name: T::Text, + pub resource: T::Text, + pub r#type: T::Text, + /// 128-bit trace ID stored as big-endian bytes. Wire-level trace ID lives at the chunk; the + /// per-span copy lets callers route a span to its chunk without scanning siblings. + pub trace_id: [u8; 16], + pub span_id: u64, + pub parent_id: u64, + pub start: i64, + pub duration: i64, + pub error: bool, + pub span_kind: SpanKind, + pub env: T::Text, + pub version: T::Text, + pub component: T::Text, + pub attributes: HashMap>, + pub span_links: Vec>, + pub span_events: Vec>, +} + +impl Clone for Span +where + T::Text: Clone, + T::Bytes: Clone, +{ + fn clone(&self) -> Self { + Span { + service: self.service.clone(), + name: self.name.clone(), + resource: self.resource.clone(), + r#type: self.r#type.clone(), + trace_id: self.trace_id, + span_id: self.span_id, + parent_id: self.parent_id, + start: self.start, + duration: self.duration, + error: self.error, + span_kind: self.span_kind, + env: self.env.clone(), + version: self.version.clone(), + component: self.component.clone(), + attributes: self.attributes.clone(), + span_links: self.span_links.clone(), + span_events: self.span_events.clone(), + } + } +} + +/// V1 span link. The 128-bit linked trace ID is stored in big-endian bytes. +#[derive(Debug, PartialEq, Default)] +pub struct SpanLink { + pub trace_id: [u8; 16], + pub span_id: u64, + pub attributes: HashMap>, + pub tracestate: T::Text, + pub flags: u32, +} + +impl Clone for SpanLink +where + T::Text: Clone, + T::Bytes: Clone, +{ + fn clone(&self) -> Self { + SpanLink { + trace_id: self.trace_id, + span_id: self.span_id, + attributes: self.attributes.clone(), + tracestate: self.tracestate.clone(), + flags: self.flags, + } + } +} + +/// V1 span event. +#[derive(Debug, PartialEq, Default)] +pub struct SpanEvent { + pub time_unix_nano: u64, + pub name: T::Text, + pub attributes: HashMap>, +} + +impl Clone for SpanEvent +where + T::Text: Clone, + T::Bytes: Clone, +{ + fn clone(&self) -> Self { + SpanEvent { + time_unix_nano: self.time_unix_nano, + name: self.name.clone(), + attributes: self.attributes.clone(), + } + } +} + +/// A V1 trace chunk: a group of spans sharing the same `trace_id`, plus chunk-level metadata +/// promoted out of span meta (priority, origin, sampling mechanism). +#[derive(Debug, PartialEq, Default)] +pub struct TraceChunk { + pub trace_id: [u8; 16], + pub priority: Option, + pub origin: Option, + pub sampling_mechanism: Option, + pub dropped_trace: bool, + pub attributes: HashMap>, + pub spans: Vec>, +} + +impl Clone for TraceChunk +where + T::Text: Clone, + T::Bytes: Clone, +{ + fn clone(&self) -> Self { + TraceChunk { + trace_id: self.trace_id, + priority: self.priority, + origin: self.origin.clone(), + sampling_mechanism: self.sampling_mechanism, + dropped_trace: self.dropped_trace, + attributes: self.attributes.clone(), + spans: self.spans.clone(), + } + } +} + +/// A V1 tracer payload: tracer-level metadata and the list of trace chunks it carries. +#[derive(Debug, PartialEq, Default)] +pub struct TracerPayload { + pub language_name: T::Text, + pub language_version: T::Text, + pub tracer_version: T::Text, + pub runtime_id: T::Text, + pub env: T::Text, + pub hostname: T::Text, + pub app_version: T::Text, + pub attributes: HashMap>, + pub chunks: Vec>, +} + +impl Clone for TracerPayload +where + T::Text: Clone, + T::Bytes: Clone, +{ + fn clone(&self) -> Self { + TracerPayload { + language_name: self.language_name.clone(), + language_version: self.language_version.clone(), + tracer_version: self.tracer_version.clone(), + runtime_id: self.runtime_id.clone(), + env: self.env.clone(), + hostname: self.hostname.clone(), + app_version: self.app_version.clone(), + attributes: self.attributes.clone(), + chunks: self.chunks.clone(), + } + } +} + +pub type SpanBytes = Span; +pub type SpanLinkBytes = SpanLink; +pub type SpanEventBytes = SpanEvent; +pub type AttributeValueBytes = AttributeValue; +pub type TraceChunkBytes = TraceChunk; +pub type TracerPayloadBytes = TracerPayload; + +pub type SpanSlice<'a> = Span>; +pub type SpanLinkSlice<'a> = SpanLink>; +pub type SpanEventSlice<'a> = SpanEvent>; +pub type AttributeValueSlice<'a> = AttributeValue>; +pub type TraceChunkSlice<'a> = TraceChunk>; +pub type TracerPayloadSlice<'a> = TracerPayload>; + +#[cfg(test)] +mod tests { + use super::*; + use libdd_tinybytes::BytesString; + + #[test] + fn span_kind_default_is_internal() { + assert_eq!(SpanKind::default(), SpanKind::Internal); + } + + #[test] + fn span_kind_from_meta() { + assert_eq!(SpanKind::from_meta("server"), SpanKind::Server); + assert_eq!(SpanKind::from_meta("client"), SpanKind::Client); + assert_eq!(SpanKind::from_meta("producer"), SpanKind::Producer); + assert_eq!(SpanKind::from_meta("consumer"), SpanKind::Consumer); + assert_eq!(SpanKind::from_meta("internal"), SpanKind::Internal); + assert_eq!(SpanKind::from_meta(""), SpanKind::Internal); + assert_eq!(SpanKind::from_meta("anything-else"), SpanKind::Internal); + } + + #[test] + fn span_kind_repr_matches_otel_spec() { + assert_eq!(SpanKind::Internal as u32, 1); + assert_eq!(SpanKind::Server as u32, 2); + assert_eq!(SpanKind::Client as u32, 3); + assert_eq!(SpanKind::Producer as u32, 4); + assert_eq!(SpanKind::Consumer as u32, 5); + } + + #[test] + fn span_default_has_zero_trace_id_and_internal_kind() { + let s = SpanBytes::default(); + assert_eq!(s.trace_id, [0u8; 16]); + assert_eq!(s.span_kind, SpanKind::Internal); + assert!(!s.error); + assert!(s.attributes.is_empty()); + } + + #[test] + fn attribute_value_clone_preserves_variants() { + let s = AttributeValueBytes::String(BytesString::from_static("v")); + assert_eq!(s.clone(), s); + let n = AttributeValueBytes::Int(42); + assert_eq!(n.clone(), n); + let list = AttributeValueBytes::List(vec![AttributeValueBytes::Bool(true)]); + assert_eq!(list.clone(), list); + } +} From 53ae7735c6eae6618bd0c818bdfc453218ddf5e6 Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Tue, 26 May 2026 17:13:00 +0200 Subject: [PATCH 4/4] chore: align v1 doc comments with v04 style Tighten module-level docs, drop verbose design-doc references, and shorten per-item comments on the V1 data model and msgpack encoder to match the concise style used in v04. --- .../src/msgpack_encoder/v1/mod.rs | 16 +++--- .../src/msgpack_encoder/v1/span_v1.rs | 23 ++------- libdd-trace-utils/src/span/v1/mod.rs | 51 ++++++++----------- 3 files changed, 32 insertions(+), 58 deletions(-) diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs index 9d14f8447c..2c24f400bc 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs @@ -423,10 +423,7 @@ pub fn to_encoded_byte_len]>>( counter.0 } -/// Encodes a [`TracerPayload`] (V1 canonical data model) as a V1 msgpack payload. -/// -/// This is the M3 encoder. The byte layout matches [`encode_payload`] (the M1 v0.4 → V1 -/// encoder) so equivalent inputs produce byte-identical outputs. +/// Encodes a [`TracerPayload`] (V1 data model) as a V1 msgpack payload. fn encode_payload_v1( writer: &mut W, payload: &TracerPayload, @@ -496,6 +493,7 @@ fn encode_payload_v1( Ok(()) } +/// Encodes one V1 chunk (a group of spans sharing a trace ID). fn encode_chunk_v1( writer: &mut W, chunk: &crate::span::v1::TraceChunk, @@ -544,12 +542,12 @@ fn encode_chunk_v1( Ok(()) } -/// Serializes a V1 [`TracerPayload`] into a `Vec` using the V1 msgpack format. +/// Serializes a [`TracerPayload`] into a `Vec` using the V1 msgpack format. pub fn to_vec_from_payload(payload: &TracerPayload) -> Vec { to_vec_from_payload_with_capacity(payload, 0) } -/// Serializes a V1 [`TracerPayload`] into a `Vec` with a pre-allocated capacity. +/// Serializes a [`TracerPayload`] into a `Vec` with a pre-allocated capacity. pub fn to_vec_from_payload_with_capacity( payload: &TracerPayload, capacity: u32, @@ -561,10 +559,10 @@ pub fn to_vec_from_payload_with_capacity( buf.into_vec() } -/// Serializes a V1 [`TracerPayload`] into a caller-provided slice. +/// Serializes a [`TracerPayload`] into a caller-provided slice. /// /// # Errors -/// Returns a `ValueWriteError` if the underlying writer fails (e.g. buffer too small). +/// Returns a `ValueWriteError` if the underlying writer fails. pub fn write_payload_to_slice( slice: &mut &mut [u8], payload: &TracerPayload, @@ -572,7 +570,7 @@ pub fn write_payload_to_slice( encode_payload_v1(slice, payload) } -/// Returns the number of bytes the V1 payload for `payload` would occupy when encoded. +/// Returns the number of bytes `payload` would occupy when encoded. pub fn to_encoded_byte_len_from_payload(payload: &TracerPayload) -> u32 { let mut counter = super::CountLength(0); let _ = encode_payload_v1(&mut counter, payload); diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs index e7af11fd84..982342fc21 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs @@ -1,12 +1,9 @@ // Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -//! V1 msgpack encoder that consumes the canonical [`crate::span::v1`] data model. +//! V1 msgpack encoder that consumes the [`crate::span::v1`] data model. //! -//! Mirrors the encoder in [`super::span_v04`] but takes pre-promoted fields directly from the -//! struct instead of extracting them from v0.4 meta. The byte layout is identical so payloads -//! produced from equivalent inputs by either encoder are byte-for-byte equal — see the -//! cross-validation tests in [`super::cross_validation_tests`]. +//! The byte layout matches [`super::span_v04`] so equivalent inputs produce byte-identical output. use crate::span::v1::{AttributeValue, Span, SpanEvent, SpanLink}; use crate::span::TraceData; @@ -20,9 +17,6 @@ use super::span_v04::{AnyValueKey, SpanEventKey, SpanKey, SpanLinkKey}; use super::StringTable; /// Encodes a typed [`AttributeValue`] as `[type_uint8, value]`. -/// -/// Recursive: [`AttributeValue::List`] and [`AttributeValue::KeyValue`] contain nested -/// [`AttributeValue`]s that are encoded in the same `[type, value]` shape. pub(super) fn encode_attribute_value( writer: &mut W, value: &AttributeValue, @@ -68,9 +62,7 @@ pub(super) fn encode_attribute_value( Ok(()) } -/// Encodes a flat triplet attributes array: `[key, type_uint8, value, key, type_uint8, value, ...]`. -/// -/// The array length is `3 * map.len()` per the V1 wire format. +/// Encodes a flat triplet attributes array: `[key, type_uint8, value, ...]`. pub(super) fn encode_attributes_map( writer: &mut W, map: &std::collections::HashMap>, @@ -160,9 +152,6 @@ pub(super) fn encode_span_events( } /// Encodes a [`Span`] (V1 data model) into V1 msgpack. -/// -/// Field-write order matches [`super::span_v04::encode_span`] so equivalent inputs produce -/// byte-identical output across the two encoders. pub(super) fn encode_span( writer: &mut W, span: &Span, @@ -212,9 +201,6 @@ pub(super) fn encode_span( write_u64(writer, span.span_id)?; write_uint8(writer, SpanKey::Start as u8)?; - // V1 normalization is the producer's responsibility: a negative `start` is not expected in - // the canonical data model. Cast preserves bits — callers that need wall-clock substitution - // should perform it before constructing the v1::Span. write_u64(writer, span.start as u64)?; if is_parent { @@ -224,7 +210,6 @@ pub(super) fn encode_span( if has_duration { write_uint8(writer, SpanKey::Duration as u8)?; - // Same rationale as for `start`: V1 inputs are expected to be normalized. write_u64(writer, span.duration.max(0) as u64)?; } @@ -263,7 +248,7 @@ pub(super) fn encode_span( write_uint8(writer, SpanKey::Component as u8)?; table.write_interned(writer, span.component.borrow())?; } - // SpanKind is always emitted per OTEL spec (default = Internal = 1). + // SpanKind is always emitted (default = Internal). write_uint8(writer, SpanKey::Kind as u8)?; write_uint(writer, span.span_kind as u64)?; diff --git a/libdd-trace-utils/src/span/v1/mod.rs b/libdd-trace-utils/src/span/v1/mod.rs index 993aab7b05..bef124ee0a 100644 --- a/libdd-trace-utils/src/span/v1/mod.rs +++ b/libdd-trace-utils/src/span/v1/mod.rs @@ -1,22 +1,11 @@ // Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -//! Canonical internal representation of a V1 trace. -//! -//! See the design doc and `RFC: Efficient Trace Payload Protocol`. Compared to v0.4, V1: -//! - promotes `env`, `version`, `component`, and `span.kind` out of the meta map into dedicated -//! span fields; -//! - merges `meta`, `metrics`, and `meta_struct` into a single typed [`AttributeValue`] map; -//! - represents `error` as `bool` and `trace_id` as a 128-bit big-endian byte array carried at the -//! chunk level. - use crate::span::{BytesData, SliceData, TraceData}; use std::collections::HashMap; /// OpenTelemetry SpanKind values, encoded on the wire as a `uint32`. -/// -/// Unset / unknown kinds default to [`SpanKind::Internal`] to match the OTEL spec and the agent's -/// behavior in `pkg/trace/api/converter.go`. +/// Unset or unrecognized kinds default to [`SpanKind::Internal`]. #[repr(u32)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum SpanKind { @@ -29,11 +18,8 @@ pub enum SpanKind { } impl SpanKind { - /// Parses the legacy v0.4 `span.kind` meta string into a [`SpanKind`]. - /// - /// Unrecognized values map to [`SpanKind::Internal`] per OTEL semantics. This is the - /// infallible counterpart to [`FromStr::from_str`]: callers converting from v0.4 always have a - /// well-defined SpanKind, even if the upstream tag is missing or invalid. + /// Parses a v0.4 `span.kind` meta value into a [`SpanKind`]. + /// Unrecognized values map to [`SpanKind::Internal`]. pub fn from_meta(s: &str) -> Self { match s { "server" => SpanKind::Server, @@ -46,9 +32,7 @@ impl SpanKind { } /// Typed V1 attribute value. -/// -/// Replaces v0.4's split `meta` / `metrics` / `meta_struct` maps. The byte layout on the wire is a -/// `(key, type_uint8, value)` triplet — see `msgpack_encoder::v1::span_v1`. +/// Replaces v0.4's split `meta` / `metrics` / `meta_struct` maps. #[derive(Debug, PartialEq)] pub enum AttributeValue { String(T::Text), @@ -78,18 +62,24 @@ where } } -/// Canonical V1 span model. +/// The generic representation of a V1 span. /// -/// Generic over [`TraceData`] so the same type can be used with owned (`BytesData`) or borrowed -/// (`SliceData`) string buffers — matching the v0.4 [`crate::span::v04::Span`] pattern. +/// `T` is the type used to represent strings in the span, it can be either owned (e.g. +/// BytesString) or borrowed (e.g. &str). To define a generic function taking any `Span` you can +/// use the [`TraceData`] trait: +/// ``` +/// use libdd_trace_utils::span::{v1::Span, TraceData}; +/// fn foo(span: Span) { +/// let _ = span.attributes.get("foo"); +/// } +/// ``` #[derive(Debug, PartialEq, Default)] pub struct Span { pub service: T::Text, pub name: T::Text, pub resource: T::Text, pub r#type: T::Text, - /// 128-bit trace ID stored as big-endian bytes. Wire-level trace ID lives at the chunk; the - /// per-span copy lets callers route a span to its chunk without scanning siblings. + /// 128-bit trace ID stored as big-endian bytes. pub trace_id: [u8; 16], pub span_id: u64, pub parent_id: u64, @@ -133,7 +123,8 @@ where } } -/// V1 span link. The 128-bit linked trace ID is stored in big-endian bytes. +/// The generic representation of a V1 span link. +/// `T` is the type used to represent strings in the span link. #[derive(Debug, PartialEq, Default)] pub struct SpanLink { pub trace_id: [u8; 16], @@ -159,7 +150,8 @@ where } } -/// V1 span event. +/// The generic representation of a V1 span event. +/// `T` is the type used to represent strings in the span event. #[derive(Debug, PartialEq, Default)] pub struct SpanEvent { pub time_unix_nano: u64, @@ -181,8 +173,7 @@ where } } -/// A V1 trace chunk: a group of spans sharing the same `trace_id`, plus chunk-level metadata -/// promoted out of span meta (priority, origin, sampling mechanism). +/// A V1 trace chunk: a group of spans sharing the same `trace_id`, plus chunk-level metadata. #[derive(Debug, PartialEq, Default)] pub struct TraceChunk { pub trace_id: [u8; 16], @@ -212,7 +203,7 @@ where } } -/// A V1 tracer payload: tracer-level metadata and the list of trace chunks it carries. +/// A V1 tracer payload: tracer-level metadata and the trace chunks it carries. #[derive(Debug, PartialEq, Default)] pub struct TracerPayload { pub language_name: T::Text,