Skip to content

Commit f7999ce

Browse files
feat(profiling): add sample count to internal payload (#15177)
## Description This PR adds data to the `internal` payload we send to `libdatadog`. In this `internal` payload, we can push custom metrics (that are not exposed to customers) but that we can access for analytics. For the time being, I only added the number of Samples taken (one per thread) and Sampling Events (one for each time we run `for_each_interp`) for the current Profile. We may want to add more in the short term (e.g. number of times adaptive sampling was adjusted, history of adaptive sampling intervals, etc.) In order to implement this feature in a way that keeps clear semantics around locking the Profile object, I refactored the code not to `borrow` only the single `Profile` but instead both the `Profile` and the `ProfilerStats` (in which we store our metrics). To keep locking clear and explicit, locking returns a `ProfileBorrow` objects which allows to access both the `Profile` and the `ProfilerStats` and that is RAII-compatible (automatically unlocks when it goes out of scope). Note the PR does add some "lock contention" because we now need to take the Profile lock in two new places (one for each metric we increment) – it should be the same order of magnitude as what we already do though (in number of lock acquisitions). We plan to refactor Stack V2 in the short to avoid having to hold the Profile / ProfilerStats lock during the upload, by using double buffering or by releasing earlier (after the Profile data has been serialised). This should improve the performance and reduce lost Samples. The PR also adds an integration test, ensuring that the generated JSON string is correct (we can't guarantee exact numbers, but we can check the numbers we have are meaningful). **Note** I marked the PR as `no-changelog` as this feature isn't public/visible by customers. Open questions: * What other metrics do we want to add? This is an example of querying through the Events UI. <img width="886" height="604" alt="image" src="https://github.com/user-attachments/assets/53c3eae0-d29a-4446-9f1b-72ac29a75d60" />
1 parent 73f2611 commit f7999ce

File tree

19 files changed

+372
-64
lines changed

19 files changed

+372
-64
lines changed

ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ add_library(
5454
src/code_provenance_interface.cpp
5555
src/ddup_interface.cpp
5656
src/profile.cpp
57+
src/profile_borrow.cpp
58+
src/profiler_stats.cpp
5759
src/sample.cpp
5860
src/sample_manager.cpp
5961
src/static_sample_pool.cpp

ddtrace/internal/datadog/profiling/dd_wrapper/include/ddup_interface.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
#pragma once
22

3-
#include <stddef.h>
4-
#include <stdint.h>
3+
#include <cstdint>
54
#include <string_view>
65
#include <unordered_map>
76

87
// Forward decl of the return pointer
98
namespace Datadog {
109
class Sample;
11-
}
10+
} // namespace Datadog
1211

1312
#ifdef __cplusplus
1413
extern "C"
@@ -68,6 +67,10 @@ extern "C"
6867
int64_t line);
6968
void ddup_push_absolute_ns(Datadog::Sample* sample, int64_t timestamp_ns);
7069
void ddup_push_monotonic_ns(Datadog::Sample* sample, int64_t monotonic_ns);
70+
71+
void ddup_increment_sampling_event_count();
72+
void ddup_increment_sample_count();
73+
7174
void ddup_flush_sample(Datadog::Sample* sample);
7275
// Stack v2 specific flush, which reverses the locations
7376
void ddup_flush_sample_v2(Datadog::Sample* sample);

ddtrace/internal/datadog/profiling/dd_wrapper/include/profile.hpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
#pragma once
22

33
#include "constants.hpp"
4+
#include "profiler_stats.hpp"
45
#include "types.hpp"
56

67
#include <atomic>
7-
#include <memory>
88
#include <mutex>
9-
#include <string>
10-
#include <string_view>
119
#include <vector>
1210

1311
extern "C"
@@ -17,9 +15,13 @@ extern "C"
1715

1816
namespace Datadog {
1917

18+
class ProfileBorrow;
19+
2020
// Serves to collect individual samples, as well as lengthen the scope of string data
2121
class Profile
2222
{
23+
friend class ProfileBorrow;
24+
2325
private:
2426
// Serialization for static state
2527
// - string table
@@ -45,6 +47,12 @@ class Profile
4547
// cannot be used until it's initialized by libdatadog
4648
ddog_prof_Profile cur_profile{};
4749

50+
Datadog::ProfilerStats profiler_stats{};
51+
52+
// Internal access methods - not for direct use
53+
ddog_prof_Profile& profile_borrow_internal();
54+
void profile_release();
55+
4856
public:
4957
// State management
5058
void one_time_init(SampleType type, unsigned int _max_nframes);
@@ -53,8 +61,8 @@ class Profile
5361

5462
// Getters
5563
size_t get_sample_type_length();
56-
ddog_prof_Profile& profile_borrow();
57-
void profile_release();
64+
65+
ProfileBorrow borrow();
5866

5967
// constref getters
6068
const ValueIndex& val();
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#pragma once
2+
3+
#include "profile.hpp"
4+
5+
namespace Datadog {
6+
7+
// Forward declaration
8+
class Profile;
9+
10+
// RAII wrapper for borrowing both profile and stats under a single lock
11+
class ProfileBorrow
12+
{
13+
private:
14+
Profile* profile_ptr;
15+
16+
public:
17+
explicit ProfileBorrow(Profile& profile);
18+
~ProfileBorrow();
19+
20+
// Disable copy
21+
ProfileBorrow(const ProfileBorrow&) = delete;
22+
ProfileBorrow& operator=(const ProfileBorrow&) = delete;
23+
24+
// Enable move
25+
ProfileBorrow(ProfileBorrow&& other) noexcept;
26+
ProfileBorrow& operator=(ProfileBorrow&& other) noexcept;
27+
28+
// Accessors
29+
ddog_prof_Profile& profile();
30+
ProfilerStats& stats();
31+
};
32+
33+
} // namespace Datadog
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#pragma once
2+
3+
#include <cstddef>
4+
5+
#include <string>
6+
#include <string_view>
7+
8+
namespace Datadog {
9+
10+
/*
11+
ProfilerStats holds statistics around Profiling to be sent along
12+
with the actual Profiles.
13+
14+
None of its methods are thread-safe and it should typically used with
15+
a mutex to protect access to the data.
16+
*/
17+
class ProfilerStats
18+
{
19+
private:
20+
std::string internal_metadata_json;
21+
22+
// Number of samples collected (one per thread)
23+
size_t sample_count = 0;
24+
25+
// Number of sampling events (one per collection cycle)
26+
size_t sampling_event_count = 0;
27+
28+
public:
29+
ProfilerStats() = default;
30+
~ProfilerStats() = default;
31+
32+
void increment_sample_count(size_t k_sample_count = 1);
33+
size_t get_sample_count();
34+
35+
void increment_sampling_event_count(size_t k_sampling_event_count = 1);
36+
size_t get_sampling_event_count();
37+
38+
// Returns a JSON string containing relevant Profiler Stats to be included
39+
// in the libdatadog payload.
40+
// The function returned a string_view to a statically allocated string that
41+
// is updated every time the function is called.
42+
std::string_view get_internal_metadata_json();
43+
44+
void reset_state();
45+
};
46+
47+
} // namespace Datadog

ddtrace/internal/datadog/profiling/dd_wrapper/include/sample.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "libdatadog_helpers.hpp"
44
#include "profile.hpp"
5+
#include "profile_borrow.hpp"
56
#include "types.hpp"
67

78
#include <string>
@@ -134,8 +135,7 @@ class Sample
134135
// Flushes the current buffer, clearing it
135136
bool flush_sample(bool reverse_locations = false);
136137

137-
static ddog_prof_Profile& profile_borrow();
138-
static void profile_release();
138+
static ProfileBorrow profile_borrow();
139139
static void postfork_child();
140140
Sample(SampleType _type_mask, unsigned int _max_nframes);
141141

ddtrace/internal/datadog/profiling/dd_wrapper/include/uploader.hpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
#pragma once
22

3-
#include "sample.hpp"
4-
#include "types.hpp"
3+
#include "profiler_stats.hpp"
54

65
#include <atomic>
7-
#include <memory>
86
#include <mutex>
7+
#include <string>
98

109
extern "C"
1110
{
@@ -24,10 +23,10 @@ class Uploader
2423
std::string output_filename;
2524
ddog_prof_ProfileExporter ddog_exporter{ .inner = nullptr };
2625

27-
bool export_to_file(ddog_prof_EncodedProfile* encoded);
26+
bool export_to_file(ddog_prof_EncodedProfile* encoded, std::string_view internal_metadata_json);
2827

2928
public:
30-
bool upload(ddog_prof_Profile& profile);
29+
bool upload(ddog_prof_Profile& profile, Datadog::ProfilerStats& profiler_stats);
3130
static void cancel_inflight();
3231
static void lock();
3332
static void unlock();

ddtrace/internal/datadog/profiling/dd_wrapper/include/uploader_builder.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#pragma once
22

3+
#include "constants.hpp"
34
#include "uploader.hpp"
45

5-
#include <mutex>
66
#include <string>
77
#include <string_view>
88
#include <unordered_map>

ddtrace/internal/datadog/profiling/dd_wrapper/src/ddup_interface.cpp

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
#include "ddup_interface.hpp"
22

3-
#include "code_provenance.hpp"
43
#include "libdatadog_helpers.hpp"
5-
#include "profile.hpp"
4+
#include "profiler_stats.hpp"
65
#include "sample.hpp"
76
#include "sample_manager.hpp"
87
#include "uploader.hpp"
@@ -304,6 +303,20 @@ ddup_push_monotonic_ns(Datadog::Sample* sample, int64_t monotonic_ns) // cppchec
304303
sample->push_monotonic_ns(monotonic_ns);
305304
}
306305

306+
void
307+
ddup_increment_sampling_event_count() // cppcheck-suppress unusedFunction
308+
{
309+
auto borrowed = Datadog::Sample::profile_borrow();
310+
borrowed.stats().increment_sampling_event_count();
311+
}
312+
313+
void
314+
ddup_increment_sample_count() // cppcheck-suppress unusedFunction
315+
{
316+
auto borrowed = Datadog::Sample::profile_borrow();
317+
borrowed.stats().increment_sample_count();
318+
}
319+
307320
void
308321
ddup_flush_sample(Datadog::Sample* sample) // cppcheck-suppress unusedFunction
309322
{
@@ -351,17 +364,19 @@ ddup_upload() // cppcheck-suppress unusedFunction
351364
// be modified. It gets released and cleared after uploading.
352365
// * Uploading cancels inflight uploads. There are better ways to do this, but this is what
353366
// we have for now.
354-
uploader.upload(Datadog::Sample::profile_borrow());
355-
Datadog::Sample::profile_release();
356-
return true;
367+
auto borrowed = Datadog::Sample::profile_borrow();
368+
bool success = uploader.upload(borrowed.profile(), borrowed.stats());
369+
borrowed.stats().reset_state();
370+
return success;
357371
}
358372

359373
void
360374
ddup_profile_set_endpoints(
361375
std::unordered_map<int64_t, std::string_view> span_ids_to_endpoints) // cppcheck-suppress unusedFunction
362376
{
363377
static bool already_warned = false; // cppcheck-suppress threadsafety-threadsafety
364-
ddog_prof_Profile& profile = Datadog::Sample::profile_borrow();
378+
auto borrowed = Datadog::Sample::profile_borrow();
379+
ddog_prof_Profile& profile = borrowed.profile();
365380
for (const auto& [span_id, trace_endpoint] : span_ids_to_endpoints) {
366381
ddog_CharSlice trace_endpoint_slice = Datadog::to_slice(trace_endpoint);
367382
auto res = ddog_prof_Profile_set_endpoint(&profile, span_id, trace_endpoint_slice);
@@ -375,14 +390,14 @@ ddup_profile_set_endpoints(
375390
ddog_Error_drop(&err);
376391
}
377392
}
378-
Datadog::Sample::profile_release();
379393
}
380394

381395
void
382396
ddup_profile_add_endpoint_counts(std::unordered_map<std::string_view, int64_t> trace_endpoints_to_counts)
383397
{
384398
static bool already_warned = false; // cppcheck-suppress threadsafety-threadsafety
385-
ddog_prof_Profile& profile = Datadog::Sample::profile_borrow();
399+
auto borrowed = Datadog::Sample::profile_borrow();
400+
ddog_prof_Profile& profile = borrowed.profile();
386401
for (const auto& [trace_endpoint, count] : trace_endpoints_to_counts) {
387402
ddog_CharSlice trace_endpoint_slice = Datadog::to_slice(trace_endpoint);
388403
auto res = ddog_prof_Profile_add_endpoint_count(&profile, trace_endpoint_slice, count);
@@ -396,5 +411,4 @@ ddup_profile_add_endpoint_counts(std::unordered_map<std::string_view, int64_t> t
396411
ddog_Error_drop(&err);
397412
}
398413
}
399-
Datadog::Sample::profile_release();
400414
}

ddtrace/internal/datadog/profiling/dd_wrapper/src/profile.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#include "profile.hpp"
22

33
#include "libdatadog_helpers.hpp"
4+
#include "profile_borrow.hpp"
5+
#include "profiler_stats.hpp"
46

57
#include <functional>
68
#include <iostream>
@@ -55,6 +57,8 @@ Datadog::Profile::reset_profile()
5557
ddog_Error_drop(&err);
5658
return false;
5759
}
60+
61+
profiler_stats.reset_state();
5862
return true;
5963
}
6064

@@ -127,11 +131,16 @@ Datadog::Profile::get_sample_type_length()
127131
return samplers.size();
128132
}
129133

134+
Datadog::ProfileBorrow
135+
Datadog::Profile::borrow()
136+
{
137+
return ProfileBorrow(*this);
138+
}
139+
130140
ddog_prof_Profile&
131-
Datadog::Profile::profile_borrow()
141+
Datadog::Profile::profile_borrow_internal()
132142
{
133-
// We could wrap this in an object for better RAII, but since this
134-
// sequence is only used in a single place, we'll hold off on that sidequest.
143+
// Note: Caller is responsible for ensuring profile_release() is called
135144
profile_mtx.lock();
136145
return cur_profile;
137146
}
@@ -219,5 +228,6 @@ Datadog::Profile::postfork_child()
219228
{
220229
new (&profile_mtx) std::mutex();
221230
// Reset the profile to clear any samples collected in the parent process
231+
profiler_stats.reset_state();
222232
reset_profile();
223233
}

0 commit comments

Comments
 (0)