Skip to content

Commit cb1463f

Browse files
Copilotbbockelm
andauthored
Add monitoring API for per-issuer validation statistics (#182)
* Add monitoring API infrastructure * Add periodic monitoring file output with thread-safe configuration --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Brian P Bockelman <bockelman@gmail.com>
1 parent 1e3553c commit cb1463f

File tree

11 files changed

+2102
-128
lines changed

11 files changed

+2102
-128
lines changed

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ pkg_check_modules(SQLITE REQUIRED sqlite3)
4444

4545
endif()
4646

47-
add_library(SciTokens SHARED src/scitokens.cpp src/scitokens_internal.cpp src/scitokens_cache.cpp)
47+
add_library(SciTokens SHARED src/scitokens.cpp src/scitokens_internal.cpp src/scitokens_cache.cpp src/scitokens_monitoring.cpp)
4848
target_compile_features(SciTokens PUBLIC cxx_std_11) # Use at least C++11 for building and when linking to scitokens
4949
target_include_directories(SciTokens PUBLIC ${JWT_CPP_INCLUDES} "${PROJECT_SOURCE_DIR}/src" PRIVATE ${CURL_INCLUDE_DIRS} ${OPENSSL_INCLUDE_DIRS} ${LIBCRYPTO_INCLUDE_DIRS} ${SQLITE_INCLUDE_DIRS} ${UUID_INCLUDE_DIRS})
5050

@@ -75,6 +75,7 @@ target_link_libraries(scitokens-list-access SciTokens)
7575
add_executable(scitokens-create src/create.cpp)
7676
target_link_libraries(scitokens-create SciTokens)
7777

78+
7879
add_executable(scitokens-generate-jwks src/generate_jwks.cpp)
7980
target_include_directories(scitokens-generate-jwks PRIVATE ${OPENSSL_INCLUDE_DIRS} ${LIBCRYPTO_INCLUDE_DIRS})
8081
target_link_libraries(scitokens-generate-jwks ${OPENSSL_LIBRARIES} ${LIBCRYPTO_LIBRARIES})

src/scitokens.cpp

Lines changed: 89 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,35 @@ std::shared_ptr<std::string> configurer::Configuration::m_cache_home =
2020
std::shared_ptr<std::string> configurer::Configuration::m_tls_ca_file =
2121
std::make_shared<std::string>("");
2222

23+
// Monitoring file config (empty string means disabled)
24+
// Protected by mutex; atomic flag for fast-path check
25+
std::string configurer::Configuration::m_monitoring_file;
26+
std::mutex configurer::Configuration::m_monitoring_file_mutex;
27+
std::atomic<bool> configurer::Configuration::m_monitoring_file_configured{
28+
false};
29+
std::atomic_int configurer::Configuration::m_monitoring_file_interval{60};
30+
31+
void configurer::Configuration::set_monitoring_file(const std::string &path) {
32+
std::lock_guard<std::mutex> lock(m_monitoring_file_mutex);
33+
m_monitoring_file = path;
34+
// Update the atomic flag after setting the string
35+
m_monitoring_file_configured.store(!path.empty(),
36+
std::memory_order_release);
37+
}
38+
39+
std::string configurer::Configuration::get_monitoring_file() {
40+
std::lock_guard<std::mutex> lock(m_monitoring_file_mutex);
41+
return m_monitoring_file;
42+
}
43+
44+
void configurer::Configuration::set_monitoring_file_interval(int seconds) {
45+
m_monitoring_file_interval = seconds;
46+
}
47+
48+
int configurer::Configuration::get_monitoring_file_interval() {
49+
return m_monitoring_file_interval;
50+
}
51+
2352
SciTokenKey scitoken_key_create(const char *key_id, const char *alg,
2453
const char *public_contents,
2554
const char *private_contents, char **err_msg) {
@@ -246,10 +275,12 @@ int scitoken_get_expiration(const SciToken token, long long *expiry,
246275
// Float value - convert to integer (truncate)
247276
// Float value - convert to integer using std::floor().
248277
// This ensures expiration is not extended by fractional seconds.
249-
result = static_cast<long long>(std::floor(claim_value.get<double>()));
278+
result =
279+
static_cast<long long>(std::floor(claim_value.get<double>()));
250280
} else {
251281
if (err_msg) {
252-
*err_msg = strdup("'exp' claim must be a number (integer or float)");
282+
*err_msg =
283+
strdup("'exp' claim must be a number (integer or float)");
253284
}
254285
return -1;
255286
}
@@ -1024,6 +1055,17 @@ int scitoken_config_set_int(const char *key, int value, char **err_msg) {
10241055
return 0;
10251056
}
10261057

1058+
else if (_key == "monitoring.file_interval_s") {
1059+
if (value < 0) {
1060+
if (err_msg) {
1061+
*err_msg = strdup("Interval cannot be negative.");
1062+
}
1063+
return -1;
1064+
}
1065+
configurer::Configuration::set_monitoring_file_interval(value);
1066+
return 0;
1067+
}
1068+
10271069
else {
10281070
if (err_msg) {
10291071
*err_msg = strdup("Key not recognized.");
@@ -1053,6 +1095,10 @@ int scitoken_config_get_int(const char *key, char **err_msg) {
10531095
return configurer::Configuration::get_expiry_delta();
10541096
}
10551097

1098+
else if (_key == "monitoring.file_interval_s") {
1099+
return configurer::Configuration::get_monitoring_file_interval();
1100+
}
1101+
10561102
else {
10571103
if (err_msg) {
10581104
*err_msg = strdup("Key not recognized.");
@@ -1080,9 +1126,12 @@ int scitoken_config_set_str(const char *key, const char *value,
10801126
return -1;
10811127
}
10821128
} else if (_key == "tls.ca_file") {
1083-
configurer::Configuration::set_tls_ca_file(value ? std::string(value) : "");
1084-
}
1085-
else {
1129+
configurer::Configuration::set_tls_ca_file(value ? std::string(value)
1130+
: "");
1131+
} else if (_key == "monitoring.file") {
1132+
configurer::Configuration::set_monitoring_file(
1133+
value ? std::string(value) : "");
1134+
} else {
10861135
if (err_msg) {
10871136
*err_msg = strdup("Key not recognized.");
10881137
}
@@ -1104,6 +1153,9 @@ int scitoken_config_get_str(const char *key, char **output, char **err_msg) {
11041153
*output = strdup(configurer::Configuration::get_cache_home().c_str());
11051154
} else if (_key == "tls.ca_file") {
11061155
*output = strdup(configurer::Configuration::get_tls_ca_file().c_str());
1156+
} else if (_key == "monitoring.file") {
1157+
*output =
1158+
strdup(configurer::Configuration::get_monitoring_file().c_str());
11071159
}
11081160

11091161
else {
@@ -1114,3 +1166,35 @@ int scitoken_config_get_str(const char *key, char **output, char **err_msg) {
11141166
}
11151167
return 0;
11161168
}
1169+
1170+
int scitoken_get_monitoring_json(char **json_out, char **err_msg) {
1171+
if (!json_out) {
1172+
if (err_msg) {
1173+
*err_msg = strdup("JSON output pointer may not be null.");
1174+
}
1175+
return -1;
1176+
}
1177+
try {
1178+
std::string json =
1179+
scitokens::internal::MonitoringStats::instance().get_json();
1180+
*json_out = strdup(json.c_str());
1181+
} catch (std::exception &exc) {
1182+
if (err_msg) {
1183+
*err_msg = strdup(exc.what());
1184+
}
1185+
return -1;
1186+
}
1187+
return 0;
1188+
}
1189+
1190+
int scitoken_reset_monitoring_stats(char **err_msg) {
1191+
try {
1192+
scitokens::internal::MonitoringStats::instance().reset();
1193+
} catch (std::exception &exc) {
1194+
if (err_msg) {
1195+
*err_msg = strdup(exc.what());
1196+
}
1197+
return -1;
1198+
}
1199+
return 0;
1200+
}

src/scitokens.h

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,12 @@ int config_set_int(const char *key, int value, char **err_msg);
302302
* Takes in key/value pairs and assigns the input value to whatever
303303
* configuration variable is indicated by the key.
304304
* Returns 0 on success, and non-zero for invalid keys or values.
305+
*
306+
* Supported keys:
307+
* - "keycache.update_interval_s": Interval between key cache updates (seconds)
308+
* - "keycache.expiration_interval_s": Key cache expiration time (seconds)
309+
* - "monitoring.file_interval_s": Interval between monitoring file writes
310+
* (seconds, default 60)
305311
*/
306312
int scitoken_config_set_int(const char *key, int value, char **err_msg);
307313

@@ -313,22 +319,77 @@ int config_get_int(const char *key, char **err_msg);
313319
* Returns the value associated with the supplied input key on success, and -1
314320
* on failure. This assumes there are no keys for which a negative return value
315321
* is permissible.
322+
*
323+
* Supported keys:
324+
* - "keycache.update_interval_s": Interval between key cache updates (seconds)
325+
* - "keycache.expiration_interval_s": Key cache expiration time (seconds)
326+
* - "monitoring.file_interval_s": Interval between monitoring file writes
327+
* (seconds, default 60)
316328
*/
317329
int scitoken_config_get_int(const char *key, char **err_msg);
318330

319331
/**
320332
* Set current scitokens str parameters.
321333
* Returns 0 on success, nonzero on failure
334+
*
335+
* Supported keys:
336+
* - "keycache.cache_home": Directory for the key cache
337+
* - "tls.ca_file": Path to TLS CA certificate file
338+
* - "monitoring.file": Path to write monitoring JSON (empty to disable, default
339+
* disabled) When enabled, monitoring stats are written periodically during
340+
* verify() calls. The write interval is controlled by
341+
* "monitoring.file_interval_s".
322342
*/
323343
int scitoken_config_set_str(const char *key, const char *value, char **err_msg);
324344

325345
/**
326346
* Get current scitokens str parameters.
327347
* Returns 0 on success, nonzero on failure, and populates the value associated
328348
* with the input key to output.
349+
*
350+
* Supported keys:
351+
* - "keycache.cache_home": Directory for the key cache
352+
* - "tls.ca_file": Path to TLS CA certificate file
353+
* - "monitoring.file": Path to write monitoring JSON (empty if disabled)
329354
*/
330355
int scitoken_config_get_str(const char *key, char **output, char **err_msg);
331356

357+
/**
358+
* Get monitoring statistics as a JSON string.
359+
* Returns a JSON object containing per-issuer validation statistics.
360+
*
361+
* Per-issuer statistics (under "issuers" key):
362+
* - successful_validations: count of successful token validations
363+
* - unsuccessful_validations: count of failed token validations
364+
* - expired_tokens: count of expired tokens encountered
365+
* - sync_validations_started: count of validations started via blocking API
366+
* - async_validations_started: count of validations started via async API
367+
* - sync_total_time_s: time spent in blocking verify() calls (updated every
368+
* 50ms)
369+
* - async_total_time_s: time spent in async validations (updated on completion)
370+
* - total_validation_time_s: sum of sync and async time
371+
* - successful_key_lookups: count of successful JWKS web refreshes
372+
* - failed_key_lookups: count of failed JWKS web refreshes
373+
* - failed_key_lookup_time_s: total time spent on failed key lookups
374+
* - expired_keys: count of times keys expired before refresh completed
375+
* - failed_refreshes: count of failed key refresh attempts (used cached keys)
376+
* - stale_key_uses: count of times keys were used past their next_update time
377+
*
378+
* Failed issuer lookups (under "failed_issuer_lookups" key):
379+
* - Per unknown issuer: count and total_time_s of failed lookup attempts
380+
* - Limited to 100 entries to prevent resource exhaustion from DDoS attacks
381+
*
382+
* The returned string must be freed by the caller using free().
383+
* Returns 0 on success, nonzero on failure.
384+
*/
385+
int scitoken_get_monitoring_json(char **json_out, char **err_msg);
386+
387+
/**
388+
* Reset all monitoring statistics.
389+
* Returns 0 on success, nonzero on failure.
390+
*/
391+
int scitoken_reset_monitoring_stats(char **err_msg);
392+
332393
#ifdef __cplusplus
333394
}
334395
#endif

0 commit comments

Comments
 (0)