diff --git a/.gitignore b/.gitignore index 5a4edd14ce..0b20ce159e 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ examples/cxx/exporter_manager.exe examples/cxx/profiling examples/cxx/profiling.exe profile.pprof +*.snap.new diff --git a/Cargo.lock b/Cargo.lock index e8ac24a225..3729daeeb6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -999,6 +999,17 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "console" +version = "0.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" +dependencies = [ + "encode_unicode", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "console-api" version = "0.9.0" @@ -1734,6 +1745,12 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -2691,6 +2708,21 @@ dependencies = [ "serde_core", ] +[[package]] +name = "insta" +version = "1.47.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4a6248eb93a4401ed2f37dfe8ea592d3cf05b7cf4f8efa867b6895af7e094e" +dependencies = [ + "console", + "once_cell", + "pest", + "pest_derive", + "serde", + "similar", + "tempfile", +] + [[package]] name = "io-lifetimes" version = "1.0.11" @@ -2997,6 +3029,7 @@ dependencies = [ "http", "http-body-util", "httpmock", + "insta", "libdd-capabilities", "libdd-capabilities-impl", "libdd-common", @@ -3006,6 +3039,7 @@ dependencies = [ "libdd-shared-runtime", "libdd-telemetry", "libdd-tinybytes", + "libdd-trace-normalization", "libdd-trace-obfuscation", "libdd-trace-protobuf", "libdd-trace-stats", @@ -4106,6 +4140,49 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "pest" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" +dependencies = [ + "memchr", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "pest_meta" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" +dependencies = [ + "pest", + "sha2", +] + [[package]] name = "petgraph" version = "0.8.3" @@ -6196,6 +6273,12 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + [[package]] name = "unarray" version = "0.1.4" diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 16d8a34b9e..bf70bdcc80 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -85,6 +85,7 @@ colorchoice,https://github.com/rust-cli/anstyle,MIT OR Apache-2.0,The colorchoic colored,https://github.com/mackwic/colored,MPL-2.0,Thomas Wickham combine,https://github.com/Marwes/combine,MIT,Markus Westerlind concurrent-queue,https://github.com/smol-rs/concurrent-queue,Apache-2.0 OR MIT,"Stjepan Glavina , Taiki Endo , John Nunley " +console,https://github.com/console-rs/console,MIT,The console Authors console-api,https://github.com/tokio-rs/console,MIT,"Eliza Weisman , Tokio Contributors " console-subscriber,https://github.com/tokio-rs/console,MIT,"Eliza Weisman , Tokio Contributors " const_format,https://github.com/rodrimati1992/const_format_crates,Zlib,rodrimati1992 @@ -125,6 +126,7 @@ dispatch2,https://github.com/madsmtm/objc2,Zlib OR Apache-2.0 OR MIT,"Mads Marqu displaydoc,https://github.com/yaahc/displaydoc,MIT OR Apache-2.0,Jane Lusby dyn-clone,https://github.com/dtolnay/dyn-clone,MIT OR Apache-2.0,David Tolnay either,https://github.com/rayon-rs/either,MIT OR Apache-2.0,bluss +encode_unicode,https://github.com/tormol/encode_unicode,Apache-2.0 OR MIT,Torbjørn Birch Moltu encoding_rs,https://github.com/hsivonen/encoding_rs,(Apache-2.0 OR MIT) AND BSD-3-Clause,Henri Sivonen enum-as-inner,https://github.com/bluejekyll/enum-as-inner,MIT OR Apache-2.0,Benjamin Fry equivalent,https://github.com/cuviper/equivalent,Apache-2.0 OR MIT,The equivalent Authors @@ -282,6 +284,10 @@ parking_lot_core,https://github.com/Amanieu/parking_lot,MIT OR Apache-2.0,Amanie paste,https://github.com/dtolnay/paste,MIT OR Apache-2.0,David Tolnay path-tree,https://github.com/viz-rs/path-tree,MIT OR Apache-2.0,Fangdun Tsai percent-encoding,https://github.com/servo/rust-url,MIT OR Apache-2.0,The rust-url developers +pest,https://github.com/pest-parser/pest,MIT OR Apache-2.0,Dragoș Tiselice +pest_derive,https://github.com/pest-parser/pest,MIT OR Apache-2.0,Dragoș Tiselice +pest_generator,https://github.com/pest-parser/pest,MIT OR Apache-2.0,Dragoș Tiselice +pest_meta,https://github.com/pest-parser/pest,MIT OR Apache-2.0,Dragoș Tiselice petgraph,https://github.com/petgraph/petgraph,MIT OR Apache-2.0,"bluss, mitchmindtree" pico-args,https://github.com/RazrFalcon/pico-args,MIT,Yevhenii Reizner pin-project,https://github.com/taiki-e/pin-project,Apache-2.0 OR MIT,The pin-project Authors @@ -465,6 +471,7 @@ try-lock,https://github.com/seanmonstar/try-lock,MIT,Sean McArthur typeid,https://github.com/dtolnay/typeid,MIT OR Apache-2.0,David Tolnay typenum,https://github.com/paholg/typenum,MIT OR Apache-2.0,"Paho Lurie-Gregg , Andre Bogus " +ucd-trie,https://github.com/BurntSushi/ucd-generate,MIT OR Apache-2.0,Andrew Gallant unarray,https://github.com/cameron1024/unarray,MIT OR Apache-2.0,The unarray Authors unicase,https://github.com/seanmonstar/unicase,MIT OR Apache-2.0,Sean McArthur unicode-ident,https://github.com/dtolnay/unicode-ident,(MIT OR Apache-2.0) AND Unicode-DFS-2016,David Tolnay diff --git a/libdd-common/src/regex_engine.rs b/libdd-common/src/regex_engine.rs index f3674f6e12..c5fb7d7973 100644 --- a/libdd-common/src/regex_engine.rs +++ b/libdd-common/src/regex_engine.rs @@ -13,7 +13,7 @@ //! regexes requiring Unicode character class support. #[cfg(all(feature = "regex-lite", not(feature = "require-regex-full")))] -pub use regex_lite::{escape, Captures, Regex, RegexBuilder, Replacer}; +pub use regex_lite::{escape, Captures, Error, Regex, RegexBuilder, Replacer}; #[cfg(not(all(feature = "regex-lite", not(feature = "require-regex-full"))))] -pub use regex::{escape, Captures, Regex, RegexBuilder, Replacer}; +pub use regex::{escape, Captures, Error, Regex, RegexBuilder, Replacer}; diff --git a/libdd-data-pipeline/Cargo.toml b/libdd-data-pipeline/Cargo.toml index 3356f41d65..f765f659ba 100644 --- a/libdd-data-pipeline/Cargo.toml +++ b/libdd-data-pipeline/Cargo.toml @@ -36,6 +36,7 @@ libdd-common = { version = "4.1.0", path = "../libdd-common", default-features = libdd-shared-runtime = { version = "1.0.0", path = "../libdd-shared-runtime", default-features = false } libdd-telemetry = { version = "5.0.0", path = "../libdd-telemetry", default-features = false, optional = true} libdd-trace-protobuf = { version = "3.0.2", path = "../libdd-trace-protobuf" } +libdd-trace-normalization = { version = "2.0.0", path = "../libdd-trace-normalization" } libdd-trace-stats = { version = "4.0.0", path = "../libdd-trace-stats", default-features = false } libdd-trace-utils = { version = "5.0.0", path = "../libdd-trace-utils", default-features = false } libdd-trace-obfuscation = { version = "3.1.0", path = "../libdd-trace-obfuscation", default-features = false, optional = true } @@ -80,6 +81,7 @@ tokio = { version = "1.23", features = [ "time", "test-util", ], default-features = false } +insta = { version = "1.47.2", features = ["json", "redactions"] } duplicate = "2.0.1" [features] diff --git a/libdd-data-pipeline/src/agent_info/schema.rs b/libdd-data-pipeline/src/agent_info/schema.rs index 7b7bfc4e3a..0b70b77164 100644 --- a/libdd-data-pipeline/src/agent_info/schema.rs +++ b/libdd-data-pipeline/src/agent_info/schema.rs @@ -40,20 +40,25 @@ pub struct AgentInfoStruct { /// Container tags hash from HTTP response header pub container_tags_hash: Option, /// Exact-match tag filters applied before stats computation (root span only). - pub filter_tags: Option, + #[serde(default)] + pub filter_tags: FilterTagsConfig, /// Regex-match tag filters applied before stats computation (root span only). - pub filter_tags_regex: Option, + #[serde(default)] + pub filter_tags_regex: FilterTagsConfig, /// Regex patterns for root-span resource names; matching traces are excluded from stats. - pub ignore_resources: Option>, + #[serde(default)] + pub ignore_resources: Vec, } /// Require/reject lists for tag-based trace filters exposed by the agent /info endpoint. #[derive(Clone, Serialize, Deserialize, Default, Debug, PartialEq)] pub struct FilterTagsConfig { /// All listed filters must match at least one root-span tag for the trace to be accepted. - pub require: Option>, + #[serde(default)] + pub require: Vec, /// If any listed filter matches a root-span tag the trace is rejected. - pub reject: Option>, + #[serde(default)] + pub reject: Vec, } #[allow(missing_docs)] diff --git a/libdd-data-pipeline/src/trace_exporter/builder.rs b/libdd-data-pipeline/src/trace_exporter/builder.rs index c9430eadf2..1f8e100fb8 100644 --- a/libdd-data-pipeline/src/trace_exporter/builder.rs +++ b/libdd-data-pipeline/src/trace_exporter/builder.rs @@ -1,6 +1,7 @@ // Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +use crate::agent_info::schema::FilterTagsConfig; use crate::agent_info::AgentInfoFetcher; use crate::otlp::config::{OtlpProtocol, DEFAULT_OTLP_TIMEOUT}; use crate::otlp::OtlpTraceConfig; @@ -8,6 +9,7 @@ use crate::otlp::OtlpTraceConfig; use crate::telemetry::TelemetryClientBuilder; use crate::trace_exporter::agent_response::AgentResponsePayloadVersion; use crate::trace_exporter::error::BuilderErrorKind; +use crate::trace_exporter::trace_filter::TraceFilterer; #[cfg(all(not(target_arch = "wasm32"), feature = "telemetry"))] use crate::trace_exporter::TelemetryConfig; #[cfg(not(target_arch = "wasm32"))] @@ -65,6 +67,9 @@ pub struct TraceExporterBuilder { connection_timeout: Option, otlp_endpoint: Option, otlp_headers: Vec<(String, String)>, + filter_tags: FilterTagsConfig, + filter_tags_regex: FilterTagsConfig, + ignore_resources: Vec, } impl TraceExporterBuilder { @@ -286,6 +291,24 @@ impl TraceExporterBuilder { self } + // TODO: doc + pub fn set_filter_tags(&mut self, filter_tags: FilterTagsConfig) -> &mut Self { + self.filter_tags = filter_tags; + self + } + + // TODO: doc + pub fn set_filter_tags_regex(&mut self, filter_tags_regex: FilterTagsConfig) -> &mut Self { + self.filter_tags_regex = filter_tags_regex; + self + } + + // TODO: doc + pub fn set_ignore_resources(&mut self, ignore_resources: Vec) -> &mut Self { + self.ignore_resources = ignore_resources; + self + } + #[allow(missing_docs)] pub fn build( self, @@ -495,6 +518,11 @@ impl TraceExporterBuilder { .agent_rates_payload_version_enabled .then(AgentResponsePayloadVersion::new), otlp_config, + trace_filterer: TraceFilterer::new( + &self.filter_tags, + &self.filter_tags_regex, + &self.ignore_resources, + ), }) } diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 7e3dd1c951..bf6a30be4b 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -5,6 +5,7 @@ pub mod builder; pub mod error; pub mod metrics; pub mod stats; +mod trace_filter; mod trace_serializer; // Re-export the builder @@ -201,6 +202,7 @@ pub struct TraceExporter, /// When set, traces are exported via OTLP HTTP/JSON instead of the Datadog agent. otlp_config: Option, + trace_filterer: trace_filter::TraceFilterer, } impl TraceExporter { @@ -347,6 +349,12 @@ impl Tra fn check_agent_info(&self) { if let Some(agent_info) = agent_info::get_agent_info() { if self.has_agent_info_state_changed(&agent_info) { + // FIXME: trace_filterer should only be enabled when CSS is on. (why ?) + self.trace_filterer.update_conf( + &agent_info.info.filter_tags, + &agent_info.info.filter_tags_regex, + &agent_info.info.ignore_resources, + ); match &**self.client_side_stats.status.load() { StatsComputationStatus::Disabled => {} StatsComputationStatus::DisabledByAgent { .. } => { @@ -574,6 +582,10 @@ impl Tra mut traces: Vec>>, ) -> Result { let mut header_tags: TracerHeaderTags = self.metadata.borrow().into(); + // FIXME: when client_computed_top_level is true, looking twice for the root span here is + // inefficient and just below in process_traces_for_stats. + // Also, only do it when css is on + self.trace_filterer.filter_traces(&mut traces); // Process stats computation and drop non-sampled (p0) chunks. // This must run before the OTLP path so that unsampled spans are not exported. @@ -1854,10 +1866,14 @@ mod tests { #[cfg(test)] mod single_threaded_tests { + use std::collections::HashMap; + use std::sync::Mutex; + use super::*; use crate::agent_info; use httpmock::prelude::*; use libdd_capabilities_impl::NativeCapabilities; + use libdd_trace_protobuf::pb::ClientStatsPayload; use libdd_trace_utils::msgpack_encoder; use libdd_trace_utils::span::v04::SpanBytes; @@ -2156,4 +2172,185 @@ mod single_threaded_tests { "obfuscation must activate when opted in and agent supports" ); } + + #[cfg_attr(miri, ignore)] + #[test] + fn test_trace_filters_snapshot() { + // Clear the agent info cache to ensure test isolation + agent_info::clear_cache_for_test(); + + let server = MockServer::start(); + let captured_stats = Arc::new(Mutex::new(Vec::new())); + + let captured_stats_in = captured_stats.clone(); + + let mock_traces = server.mock(|when, then| { + when.method(POST) + .header("Content-type", "application/msgpack") + .path("/v0.4/traces"); + then.status(200).body(""); + }); + + let mock_stats = server.mock(|when, then| { + when.method(POST) + .header("Content-type", "application/msgpack") + .path("/v0.6/stats") + .is_true(move |req| { + captured_stats_in.lock().unwrap().push(req.body_vec()); + true + }); + then.status(200).body(""); + }); + + let _mock_info = server.mock(|when, then| { + when.method(GET).path("/info"); + then.status(200) + .header("content-type", "application/json") + .header("datadog-agent-state", "1") + .body( + r#"{ + "version":"1", + "client_drop_p0s":true, + "endpoints":["/v0.4/traces","/v0.6/stats"], + "filter_tags": {"reject": ["my_ignore_tag"], "require": ["my_require_tag:true"]}, + "filter_tags_regex": {"reject": ["my_regex_ignore_tag:.*true.*"]}, + "ignore_resources": [".*IGNORED.*"] + }"#, + ); + }); + + let runtime = Arc::new(SharedRuntime::new().unwrap()); + + let mut builder = TraceExporter::::builder(); + builder + .set_url(&server.url("/")) + .set_service("test") + .set_env("staging") + .set_tracer_version("v0.1") + .set_language("nodejs") + .set_language_version("1.0") + .set_language_interpreter("v8") + .set_input_format(TraceExporterInputFormat::V04) + .set_output_format(TraceExporterOutputFormat::V04) + .set_shared_runtime(runtime.clone()) + .enable_stats(Duration::from_secs(10)); + let exporter = builder.build::().unwrap(); + + // Wait for the info fetcher to get the config + while agent_info::get_agent_info().is_none() { + std::thread::sleep(Duration::from_millis(100)); + } + + let result = exporter.send( + msgpack_encoder::v04::to_vec(&[ + vec![SpanBytes { + duration: 10, + resource: "test".into(), + meta: HashMap::from_iter([("my_require_tag".into(), "true".into())]), + ..Default::default() + }], + // This one gets filtered out because it matches an ignore_resources pattern + vec![SpanBytes { + duration: 10, + resource: "test IGNORED resource test".into(), + meta: HashMap::from_iter([("my_require_tag".into(), "true".into())]), + ..Default::default() + }], + // This one gets filtered out because one of its tag matches a reject filter_tag + vec![SpanBytes { + duration: 10, + resource: "test ignored because of reject filter_tag".into(), + meta: HashMap::from_iter([ + ("my_ignore_tag".into(), "".into()), + ("my_require_tag".into(), "true".into()), + ]), + ..Default::default() + }], + // This one gets filtered out because one of its tag matches a reject + // regex_filter_tag + vec![SpanBytes { + duration: 10, + resource: "test ignored because of reject regex_filter_tag".into(), + meta: HashMap::from_iter([ + ( + "my_regex_ignore_tag".into(), + "something-true-something".into(), + ), + ("my_require_tag".into(), "true".into()), + ]), + ..Default::default() + }], + // This one gets filtered out because it doesn't have my_require_tag:true + vec![SpanBytes { + duration: 10, + resource: "test ignored because missing a required filter_tag".into(), + meta: HashMap::from_iter([("a_useless_tag".into(), "true".into())]), + ..Default::default() + }], + // This one gets filtered out because it doesn't have my_require_tag:true + vec![SpanBytes { + duration: 10, + resource: "test ignored because wrong value on filter_tag".into(), + meta: HashMap::from_iter([("my_require_tag".into(), "false".into())]), + ..Default::default() + }], + vec![SpanBytes { + duration: 10, + resource: "test2".into(), + meta: HashMap::from_iter([("my_require_tag".into(), "true".into())]), + ..Default::default() + }], + ]) + .as_ref(), + ); + assert!(result.is_err()); + + // Wait for the stats worker to be active before shutting down to avoid potential flaky + // tests on CI where we shutdown before the stats worker had time to start + let start_time = std::time::Instant::now(); + while !exporter.is_stats_worker_active() { + if start_time.elapsed() > Duration::from_secs(10) { + panic!("Timeout waiting for stats worker to become active"); + } + std::thread::sleep(Duration::from_millis(10)); + } + + runtime.shutdown(None).unwrap(); + + // Wait for the mock server to process the stats + for _ in 0..1000 { + if mock_traces.calls() > 0 && mock_stats.calls() > 0 { + break; + } else { + std::thread::sleep(Duration::from_millis(10)); + } + } + + mock_traces.assert(); + mock_stats.assert(); + + // Verify snapshots matches + let mut captured_stats: Vec = captured_stats + .lock() + .unwrap() + .iter() + .map(|payload| rmp_serde::from_slice(payload).unwrap()) + .collect(); + // Sort for deterministic snapshot output + for payload in &mut captured_stats { + for bucket in &mut payload.stats { + bucket.stats.sort_by(|a, b| a.resource.cmp(&b.resource)); + } + } + insta::assert_json_snapshot!( + "trace_filters", + serde_json::to_value(&captured_stats).unwrap(), + { + "[].RuntimeID" => "[id]", + "[].Stats[].Start" => "[timestamp]", + "[].Stats[].Stats[].OkSummary" => "[sketch]", + "[].Stats[].Stats[].ErrorSummary" => "[sketch]", + } + ); + } } diff --git a/libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap b/libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap new file mode 100644 index 0000000000..cbe5725103 --- /dev/null +++ b/libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap @@ -0,0 +1,150 @@ +--- +source: libdd-data-pipeline/src/trace_exporter/mod.rs +expression: "serde_json::to_value(&captured_stats).unwrap()" +--- +[ + { + "Hostname": "", + "Env": "staging", + "Version": "", + "Stats": [ + { + "Start": "[timestamp]", + "Duration": 10000000000, + "Stats": [ + { + "Service": "", + "Name": "", + "Resource": "test", + "HTTPStatusCode": 0, + "Type": "", + "DBType": "", + "Hits": 1, + "Errors": 0, + "Duration": 10, + "OkSummary": "[sketch]", + "ErrorSummary": "[sketch]", + "Synthetics": false, + "TopLevelHits": 1, + "SpanKind": "", + "PeerTags": [], + "IsTraceRoot": 1, + "GRPCStatusCode": "", + "HTTPMethod": "", + "HTTPEndpoint": "", + "srv_src": "", + "SpanDerivedPrimaryTags": [] + }, + { + "Service": "", + "Name": "", + "Resource": "test2", + "HTTPStatusCode": 0, + "Type": "", + "DBType": "", + "Hits": 1, + "Errors": 0, + "Duration": 10, + "OkSummary": "[sketch]", + "ErrorSummary": "[sketch]", + "Synthetics": false, + "TopLevelHits": 1, + "SpanKind": "", + "PeerTags": [], + "IsTraceRoot": 1, + "GRPCStatusCode": "", + "HTTPMethod": "", + "HTTPEndpoint": "", + "srv_src": "", + "SpanDerivedPrimaryTags": [] + } + ], + "AgentTimeShift": 0 + } + ], + "Lang": "", + "TracerVersion": "", + "RuntimeID": "[id]", + "Sequence": 0, + "AgentAggregation": "", + "Service": "test", + "ContainerID": "", + "Tags": [], + "GitCommitSha": "", + "ImageTag": "", + "ProcessTagsHash": 0, + "ProcessTags": "" + }, + { + "Hostname": "", + "Env": "staging", + "Version": "", + "Stats": [ + { + "Start": "[timestamp]", + "Duration": 10000000000, + "Stats": [ + { + "Service": "", + "Name": "", + "Resource": "test", + "HTTPStatusCode": 0, + "Type": "", + "DBType": "", + "Hits": 1, + "Errors": 0, + "Duration": 10, + "OkSummary": "[sketch]", + "ErrorSummary": "[sketch]", + "Synthetics": false, + "TopLevelHits": 1, + "SpanKind": "", + "PeerTags": [], + "IsTraceRoot": 1, + "GRPCStatusCode": "", + "HTTPMethod": "", + "HTTPEndpoint": "", + "srv_src": "", + "SpanDerivedPrimaryTags": [] + }, + { + "Service": "", + "Name": "", + "Resource": "test2", + "HTTPStatusCode": 0, + "Type": "", + "DBType": "", + "Hits": 1, + "Errors": 0, + "Duration": 10, + "OkSummary": "[sketch]", + "ErrorSummary": "[sketch]", + "Synthetics": false, + "TopLevelHits": 1, + "SpanKind": "", + "PeerTags": [], + "IsTraceRoot": 1, + "GRPCStatusCode": "", + "HTTPMethod": "", + "HTTPEndpoint": "", + "srv_src": "", + "SpanDerivedPrimaryTags": [] + } + ], + "AgentTimeShift": 0 + } + ], + "Lang": "", + "TracerVersion": "", + "RuntimeID": "[id]", + "Sequence": 0, + "AgentAggregation": "", + "Service": "test", + "ContainerID": "", + "Tags": [], + "GitCommitSha": "", + "ImageTag": "", + "ProcessTagsHash": 0, + "ProcessTags": "" + } +] diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs new file mode 100644 index 0000000000..c8aff018cb --- /dev/null +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -0,0 +1,699 @@ +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 +//! Trace-level filter logic for client-side stats (filter_tags, filter_tags_regex, +//! ignore_resources as published by the agent's /info endpoint). +use std::{borrow::Borrow as _, collections::HashMap, sync::Arc}; + +use libdd_common::regex_engine; +use libdd_trace_stats::span_concentrator::StatSpan; +use libdd_trace_utils::span::trace_utils::get_root_span_index_v4; +use tracing::{debug, error}; + +trait TagFilter { + /// Returns true if the given tag value matches the Filterer. + fn matches_tag_value(&self, value: &str) -> bool; + fn find_tag<'a, T: libdd_trace_utils::span::SpanText>( + &'a self, + meta: &'a HashMap, + ) -> Option<(&'a str, &'a T)>; +} + +#[derive(Debug)] +struct TagStringFilter { + key: String, + value: Option, +} + +#[derive(Debug)] +struct TagRegexFilter { + key: String, + value: Option, +} + +#[derive(Debug)] +// Slowest kind of filter where the key field is also a regex +struct TagRegexKeyFilter { + key: regex_engine::Regex, + value: Option, +} + +/// Parsed config +#[derive(Debug)] +struct TraceFilteredConf { + reject: Vec, + reject_regex: Vec, + reject_key_regex: Vec, + + require: Vec, + require_regex: Vec, + require_key_regex: Vec, + + ignore_resources: Vec, +} + +#[derive(Debug)] +pub struct TraceFilterer { + conf: arc_swap::ArcSwap, +} + +impl TagStringFilter { + fn from_str(tag: &str) -> Self { + if let Some((key, value)) = tag.split_once(":") { + TagStringFilter { + key: key.to_owned(), + value: Some(value.to_owned()), + } + } else { + TagStringFilter { + key: tag.to_owned(), + value: None, + } + } + } +} + +impl TagFilter for TagStringFilter { + fn matches_tag_value(&self, value: &str) -> bool { + match &self.value { + None => true, // No value requirement => Any value is a match + Some(required_value) => value == required_value, + } + } + + fn find_tag<'a, T: libdd_trace_utils::span::SpanText>( + &'a self, + meta: &'a HashMap, + ) -> std::option::Option<(&'a str, &'a T)> { + Some((self.key.as_ref(), meta.get(&self.key)?)) + } +} + +impl TagFilter for TagRegexFilter { + fn matches_tag_value(&self, value: &str) -> bool { + match &self.value { + None => true, // No value requirement => Any value is a match + Some(pattern) => pattern.is_match(value), + } + } + + fn find_tag<'a, T: libdd_trace_utils::span::SpanText>( + &'a self, + meta: &'a HashMap, + ) -> std::option::Option<(&'a str, &'a T)> { + Some((self.key.as_ref(), meta.get(&self.key)?)) + } +} + +impl TagFilter for TagRegexKeyFilter { + fn matches_tag_value(&self, value: &str) -> bool { + match &self.value { + None => true, // No value requirement => Any value is a match + Some(pattern) => pattern.is_match(value), + } + } + + fn find_tag<'a, T: libdd_trace_utils::span::SpanText>( + &self, + meta: &'a HashMap, + ) -> std::option::Option<(&'a str, &'a T)> { + meta.iter() + .find(|&(key, _)| self.key.is_match(key.borrow())) + .map(|(key, value)| (key.borrow(), value)) + } +} + +/// Compile a regex anchored to the full string. +fn compile_anchored(pattern: &str) -> Result { + regex_engine::Regex::new(&format!("^(?:{pattern})$")) +} + +/// Returns `true` when `key` contains no regex metacharacters and can be used for a direct +/// O(1) lookup. `.` is intentionally treated as a literal (not a wildcard) in key patterns. +fn is_literal_key(key: &str) -> bool { + !key.contains([ + '*', '+', '?', '[', ']', '(', ')', '{', '}', '^', '$', ',', '\\', + ]) +} + +impl TraceFilteredConf { + /// Compile all `filter_tags_regex` entries, splitting into literal-key (fast) and + /// regex-key (slow) lists based on whether the key portion contains metacharacters. + fn compile_regex_filters(filters: &[String]) -> (Vec, Vec) { + let mut tag_regex_filters = Vec::new(); + let mut tag_regex_key_filters = Vec::new(); + for filter in filters { + let (key, value) = match filter.split_once(":") { + Some((key, value)) => (key, Some(value)), + None => (filter.as_ref(), None), + }; + + let value = match value { + Some(value) => match compile_anchored(value) { + Ok(regex) => Some(regex), + Err(err) => { + error!( + ?filter, + ?err, + "Invalid regex pattern in tag filter's value, skipping it" + ); + // FIXME: dd-trace-php considers that if the value pattern is bad, we still + // keep the filter by only matching on the key. I find it more intuitive to + // drop the filter altogether + continue; + } + }, + None => None, + }; + + if is_literal_key(key) { + tag_regex_filters.push(TagRegexFilter { + key: key.to_owned(), + value, + }); + } else { + match compile_anchored(key) { + Ok(key) => tag_regex_key_filters.push(TagRegexKeyFilter { key, value }), + Err(err) => { + error!( + ?filter, + ?err, + "Invalid regex pattern in tag filter's key, skipping it" + ); + continue; + } + } + } + } + + (tag_regex_filters, tag_regex_key_filters) + } + + fn parse( + filter_tags: &crate::agent_info::schema::FilterTagsConfig, + filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, + ignore_resources: &[String], + ) -> Self { + let (require_regex, require_key_regex) = + Self::compile_regex_filters(&filter_tags_regex.require); + let (reject_regex, reject_key_regex) = + Self::compile_regex_filters(&filter_tags_regex.reject); + + let reject = filter_tags + .reject + .iter() + .map(|tag| TagStringFilter::from_str(tag)) + .collect(); + let require = filter_tags + .require + .iter() + .map(|tag| TagStringFilter::from_str(tag)) + .collect(); + let ignore_resources = ignore_resources + .iter() + .filter_map(|regex| { + compile_anchored(regex) + .inspect_err(|err| { + error!( + ?regex, + ?err, + "Invalid regex pattern in ignore resources filter, skipping it" + ) + }) + .ok() + }) + .collect(); + TraceFilteredConf { + reject, + require, + reject_regex, + require_regex, + reject_key_regex, + require_key_regex, + ignore_resources, + } + } +} + +impl TraceFilterer { + pub fn new( + filter_tags: &crate::agent_info::schema::FilterTagsConfig, + filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, + ignore_resources: &[String], + ) -> Self { + let conf = TraceFilteredConf::parse(filter_tags, filter_tags_regex, ignore_resources); + Self { + conf: arc_swap::ArcSwap::from_pointee(conf), + } + } + + pub fn update_conf( + &self, + filter_tags: &crate::agent_info::schema::FilterTagsConfig, + filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, + ignore_resources: &[String], + ) { + let new_conf = TraceFilteredConf::parse(filter_tags, filter_tags_regex, ignore_resources); + self.conf.swap(Arc::new(new_conf)); + } + + pub fn filter_traces( + &self, + traces: &mut Vec>>, + ) { + let conf = self.conf.load(); + traces.retain(|trace| { + let Ok(root_span_index) = get_root_span_index_v4(trace) else { + // FIXME: in this case it's a distributed trace ? Maybe we should remove the debug + // log in get_root_span_index_v4 then + return true; + }; + let root_span = &trace[root_span_index]; + let should_drop = Self::should_drop(&conf, root_span); + if should_drop { + debug!("Trace rejected as it fails to meet tag requirements. root: %v"); + } + !should_drop + }); + } + + /// Checks if the trace with root span `root_span` should be dropped based on filter + /// configuration. + /// + /// Applies a subset of trace normalization logic from `libdd-trace-normalization` before + /// checking. + fn should_drop( + conf: &TraceFilteredConf, + root_span: &libdd_trace_utils::span::v04::Span, + ) -> bool { + if conf + .reject + .iter() + .any(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) + { + return true; + } + + if conf + .reject_regex + .iter() + .any(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) + { + return true; + } + + if conf + .reject_key_regex + .iter() + .any(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) + { + return true; + } + + if !conf + .require + .iter() + .all(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) + { + return true; + } + + if !conf + .require_regex + .iter() + .all(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) + { + return true; + } + + if !conf + .require_key_regex + .iter() + .all(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) + { + return true; + } + + if !conf.ignore_resources.is_empty() { + let span_resource = root_span.resource(); + // Normalization + let span_resource = if span_resource.is_empty() { + let span_name = root_span.name(); + debug!( + ?span_name, + "Trace filter fixing malformed trace. Resource is empty so using name instead" + ); + span_name + } else { + span_resource + }; + + if conf + .ignore_resources + .iter() + .any(|resource_pattern| resource_pattern.is_match(span_resource)) + { + return true; + } + } + + false + } + + fn check_tag_filter_with_normalization( + filter: &impl TagFilter, + root_span: &libdd_trace_utils::span::v04::Span, + ) -> bool { + let Some((key, value)) = filter.find_tag(&root_span.meta) else { + return false; + }; + let value = value.borrow(); + match key { + "env" => { + let normalized_value = + libdd_trace_normalization::normalize_utils::normalize_tag_cloned(value); + filter.matches_tag_value(&normalized_value) + } + "http.status_code" => { + if !libdd_trace_normalization::normalizer::is_valid_http_status_code(value) { + debug!(?value,"trace filter on http.status_code ignored because root span's `http.status_code` is invalid"); + return false; + } + filter.matches_tag_value(value) + } + _ => filter.matches_tag_value(value), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::agent_info::schema::FilterTagsConfig; + use libdd_trace_utils::span::v04::SpanBytes; + use std::collections::HashMap; + + // ---- helpers ---- + + fn ftc(require: &[&str], reject: &[&str]) -> FilterTagsConfig { + FilterTagsConfig { + require: require.iter().map(|s| s.to_string()).collect(), + reject: reject.iter().map(|s| s.to_string()).collect(), + } + } + + fn no_tags() -> FilterTagsConfig { + FilterTagsConfig::default() + } + + fn span_with(resource: &'static str, meta: &[(&'static str, &'static str)]) -> SpanBytes { + SpanBytes { + service: "svc".into(), + name: "op".into(), + resource: resource.into(), + span_id: 1, + trace_id: 1, + parent_id: 0, + meta: meta + .iter() + .map(|(k, v)| ((*k).into(), (*v).into())) + .collect::>(), + ..Default::default() + } + } + + fn one_trace(s: SpanBytes) -> Vec> { + vec![vec![s]] + } + + fn reject_str(tags: &[&str]) -> TraceFilterer { + TraceFilterer::new(&ftc(&[], tags), &no_tags(), &[]) + } + + fn require_str(tags: &[&str]) -> TraceFilterer { + TraceFilterer::new(&ftc(tags, &[]), &no_tags(), &[]) + } + + fn reject_regex(tags: &[&str]) -> TraceFilterer { + TraceFilterer::new(&no_tags(), &ftc(&[], tags), &[]) + } + + fn require_regex(tags: &[&str]) -> TraceFilterer { + TraceFilterer::new(&no_tags(), &ftc(tags, &[]), &[]) + } + + fn ignore_resources(patterns: &[&str]) -> TraceFilterer { + let pats: Vec = patterns.iter().map(|s| s.to_string()).collect(); + TraceFilterer::new(&no_tags(), &no_tags(), &pats) + } + + // ---- reject (TagStringFilter) ---- + + #[test] + fn reject_string_exact_match_drops() { + let mut traces = one_trace(span_with("r", &[("env", "prod")])); + reject_str(&["env:prod"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + #[test] + fn reject_string_wrong_value_keeps() { + let mut traces = one_trace(span_with("r", &[("env", "staging")])); + reject_str(&["env:prod"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn reject_string_missing_tag_keeps() { + let mut traces = one_trace(span_with("r", &[])); + reject_str(&["env:prod"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn reject_string_key_only_matches_any_value() { + // A key-only filter (no `:value` part) matches regardless of the tag's value. + let mut traces = one_trace(span_with("r", &[("env", "anything")])); + reject_str(&["env"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- reject_regex (TagRegexFilter – literal key, regex value) ---- + + #[test] + fn reject_regex_value_match_drops() { + let mut traces = one_trace(span_with("r", &[("env", "production")])); + reject_regex(&["env:prod.*"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + #[test] + fn reject_regex_value_no_match_keeps() { + let mut traces = one_trace(span_with("r", &[("env", "staging")])); + reject_regex(&["env:prod.*"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + // ---- reject_key_regex (TagRegexKeyFilter – regex key) ---- + // A key pattern containing `*` triggers the key-regex path. + + #[test] + fn reject_key_regex_key_and_value_match_drops() { + // "err.*" contains `*` → key is compiled as a regex; matches "error". + let mut traces = one_trace(span_with("r", &[("error", "timeout")])); + reject_regex(&["err.*:timeout"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + #[test] + fn reject_key_regex_wrong_value_keeps() { + let mut traces = one_trace(span_with("r", &[("error", "network")])); + reject_regex(&["err.*:timeout"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn reject_key_regex_missing_key_keeps() { + let mut traces = one_trace(span_with("r", &[])); + reject_regex(&["err.*:timeout"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + // ---- require (TagStringFilter) ---- + + #[test] + fn require_string_present_and_matching_keeps() { + let mut traces = one_trace(span_with("r", &[("env", "prod")])); + require_str(&["env:prod"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn require_string_missing_tag_drops() { + let mut traces = one_trace(span_with("r", &[])); + require_str(&["env:prod"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + #[test] + fn require_string_wrong_value_drops() { + let mut traces = one_trace(span_with("r", &[("env", "staging")])); + require_str(&["env:prod"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- require_regex (TagRegexFilter – literal key, regex value) ---- + + #[test] + fn require_regex_value_match_keeps() { + let mut traces = one_trace(span_with("r", &[("env", "production")])); + require_regex(&["env:prod.*"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn require_regex_missing_drops() { + let mut traces = one_trace(span_with("r", &[])); + require_regex(&["env:prod.*"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- require_key_regex (TagRegexKeyFilter – regex key) ---- + + #[test] + fn require_key_regex_key_exists_keeps() { + // Key-only pattern → value: None → any tag value satisfies the requirement. + let mut traces = one_trace(span_with("r", &[("error", "any")])); + require_regex(&["err.*"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn require_key_regex_missing_key_drops() { + let mut traces = one_trace(span_with("r", &[])); + require_regex(&["err.*"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- ignore_resources ---- + + #[test] + fn ignore_resources_match_drops() { + let mut traces = one_trace(span_with("GET /health", &[])); + ignore_resources(&["GET /health"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + #[test] + fn ignore_resources_no_match_keeps() { + let mut traces = one_trace(span_with("POST /data", &[])); + ignore_resources(&["GET /health"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn ignore_resources_empty_resource_falls_back_to_name() { + // When resource is empty the span's name field is used for matching. + // The helper sets name = "op", so ignore_resources("op") must drop it. + let mut traces = one_trace(span_with("", &[])); + ignore_resources(&["op"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- env tag normalization ---- + + #[test] + fn env_normalization_reject_matches_after_lowercase() { + // normalize_tag_cloned("PROD") == "prod"; the reject filter "env:prod" must fire. + let mut traces = one_trace(span_with("r", &[("env", "PROD")])); + reject_str(&["env:prod"]).filter_traces(&mut traces); + assert!( + traces.is_empty(), + "env value should be normalized before matching" + ); + } + + #[test] + fn env_normalization_require_matches_normalized_value() { + // normalize_tag_cloned("Prod Env") == "prod_env" (uppercase + space → underscore). + let mut traces = one_trace(span_with("r", &[("env", "Prod Env")])); + require_str(&["env:prod_env"]).filter_traces(&mut traces); + assert_eq!( + traces.len(), + 1, + "normalized env should satisfy the require filter" + ); + } + + // ---- http.status_code special handling ---- + + #[test] + fn http_status_code_invalid_value_skips_reject_filter() { + // is_valid_http_status_code("abc") == false → check_tag_filter returns false + // → reject never fires → trace kept even though the raw value equals the filter. + let mut traces = one_trace(span_with("r", &[("http.status_code", "abc")])); + reject_str(&["http.status_code:abc"]).filter_traces(&mut traces); + assert_eq!( + traces.len(), + 1, + "invalid status code should not trigger the filter" + ); + } + + #[test] + fn http_status_code_valid_value_triggers_reject_filter() { + let mut traces = one_trace(span_with("r", &[("http.status_code", "500")])); + reject_str(&["http.status_code:500"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- update_conf ---- + + #[test] + fn update_conf_takes_effect() { + let f = TraceFilterer::new(&no_tags(), &no_tags(), &[]); + + // No filters: trace is kept. + let mut traces = one_trace(span_with("r", &[("env", "prod")])); + f.filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + + // Swap in a reject filter: same trace is now dropped. + f.update_conf(&ftc(&[], &["env:prod"]), &no_tags(), &[]); + let mut traces = one_trace(span_with("r", &[("env", "prod")])); + f.filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- edge / misc ---- + + #[test] + fn multiple_traces_partial_rejection() { + let f = reject_str(&["env:prod"]); + let mut traces = vec![ + vec![span_with("r", &[("env", "prod")])], // dropped + vec![span_with("r", &[("env", "staging")])], // kept + ]; + f.filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn no_filters_keeps_all_traces() { + let f = TraceFilterer::new(&no_tags(), &no_tags(), &[]); + let mut traces = vec![ + vec![span_with("r1", &[])], + vec![span_with("r2", &[("env", "prod")])], + ]; + f.filter_traces(&mut traces); + assert_eq!(traces.len(), 2); + } + + #[test] + fn invalid_regex_in_filter_is_skipped_gracefully() { + // A bad regex pattern is silently discarded; no panic, trace is kept. + let f = reject_regex(&["env:[invalid"]); + let mut traces = one_trace(span_with("r", &[("env", "anything")])); + f.filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } +} diff --git a/libdd-trace-normalization/src/normalize_utils.rs b/libdd-trace-normalization/src/normalize_utils.rs index b70093c817..fab5d43cae 100644 --- a/libdd-trace-normalization/src/normalize_utils.rs +++ b/libdd-trace-normalization/src/normalize_utils.rs @@ -83,6 +83,12 @@ pub fn normalize_parent_id(parent_id: &mut u64, trace_id: u64, span_id: u64) { } } +pub fn normalize_tag_cloned(tag: &str) -> String { + let mut tag = tag.to_owned(); + normalize_tag(&mut tag); + tag +} + pub fn normalize_tag(tag: &mut String) { // Since we know that we're only going to write valid utf8 we can work with the Vec directly let bytes = unsafe { tag.as_mut_vec() }; diff --git a/libdd-trace-normalization/src/normalizer.rs b/libdd-trace-normalization/src/normalizer.rs index 7450dad908..ee5790cbc6 100644 --- a/libdd-trace-normalization/src/normalizer.rs +++ b/libdd-trace-normalization/src/normalizer.rs @@ -35,7 +35,7 @@ pub(crate) fn normalize_span(s: &mut pb::Span) -> anyhow::Result<()> { } if let Some(code) = s.meta.get("http.status_code") { - if !is_valid_status_code(code) { + if !is_valid_http_status_code(code) { s.meta.remove("http.status_code"); } }; @@ -43,7 +43,7 @@ pub(crate) fn normalize_span(s: &mut pb::Span) -> anyhow::Result<()> { Ok(()) } -pub(crate) fn is_valid_status_code(sc: &str) -> bool { +pub fn is_valid_http_status_code(sc: &str) -> bool { if let Ok(code) = sc.parse::() { return (100..600).contains(&code); } @@ -476,11 +476,13 @@ mod tests { #[test] fn test_is_valid_status_code() { - assert!(normalizer::is_valid_status_code("100")); - assert!(normalizer::is_valid_status_code("599")); - assert!(!normalizer::is_valid_status_code("99")); - assert!(!normalizer::is_valid_status_code("600")); - assert!(!normalizer::is_valid_status_code("Invalid status code")); + assert!(normalizer::is_valid_http_status_code("100")); + assert!(normalizer::is_valid_http_status_code("599")); + assert!(!normalizer::is_valid_http_status_code("99")); + assert!(!normalizer::is_valid_http_status_code("600")); + assert!(!normalizer::is_valid_http_status_code( + "Invalid status code" + )); } #[test] diff --git a/libdd-trace-utils/src/span/trace_utils.rs b/libdd-trace-utils/src/span/trace_utils.rs index 8dd8a03b05..17910320f9 100644 --- a/libdd-trace-utils/src/span/trace_utils.rs +++ b/libdd-trace-utils/src/span/trace_utils.rs @@ -3,8 +3,10 @@ //! Trace-utils functionalities implementation for tinybytes based spans +use tracing::debug; + use super::{v04::Span, SpanText, TraceData}; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; /// Span metric the mini agent must set for the backend to recognize top level span const TOP_LEVEL_KEY: &str = "_top_level"; @@ -60,6 +62,50 @@ where } } +// FIXME: duplicated with super::get_root_span_index +pub fn get_root_span_index_v4(trace: &[Span]) -> anyhow::Result +where + T: TraceData, +{ + if trace.is_empty() { + anyhow::bail!("Cannot find root span index in an empty trace."); + } + + // Do a first pass to find if we have an obvious root span (starting from the end) since some + // clients put the root span last. + for (i, span) in trace.iter().enumerate().rev() { + if span.parent_id == 0 { + return Ok(i); + } + } + + let span_ids: HashSet<_> = trace.iter().map(|span| span.span_id).collect(); + + let mut root_span_id = None; + for (i, span) in trace.iter().enumerate() { + // If a span's parent is not in the trace, it is a root + if !span_ids.contains(&span.parent_id) { + if root_span_id.is_some() { + debug!( + trace_id = &trace[0].trace_id, + "trace has multiple root spans" + ); + } + root_span_id = Some(i); + } + } + Ok(match root_span_id { + Some(i) => i, + None => { + debug!( + trace_id = &trace[0].trace_id, + "Could not find the root span for trace" + ); + trace.len() - 1 + } + }) +} + /// Return true if the span has a top level key set pub fn has_top_level(span: &Span) -> bool { span.metrics diff --git a/libdd-trace-utils/src/trace_utils.rs b/libdd-trace-utils/src/trace_utils.rs index 3cd53b04d7..26bfafbfdd 100644 --- a/libdd-trace-utils/src/trace_utils.rs +++ b/libdd-trace-utils/src/trace_utils.rs @@ -381,10 +381,7 @@ pub fn get_root_span_index(trace: &[pb::Span]) -> anyhow::Result { } } - let mut span_ids: HashSet = HashSet::with_capacity(trace.len()); - for span in trace.iter() { - span_ids.insert(span.span_id); - } + let span_ids: HashSet<_> = trace.iter().map(|span| span.span_id).collect(); let mut root_span_id = None; for (i, span) in trace.iter().enumerate() {