diff --git a/README.md b/README.md index 494e343749..536dbbb923 100644 --- a/README.md +++ b/README.md @@ -328,30 +328,31 @@ Check out the [Clustering Guide](https://github.com/redis/node-redis/blob/master ### OpenTelemetry -#### OpenTelemetry Metrics Instrumentation - ```typescript import { createClient, OpenTelemetry } from "redis"; OpenTelemetry.init({ metrics: { - enabled: true - } + enabled: true, + }, + tracing: { + enabled: true, + }, }); -const client = createClient() +const client = createClient(); await client.connect(); // ... use the client as usual ``` -**Important:** Initializing `OpenTelemetry` only enables node-redis metrics instrumentation and requires both `@opentelemetry/api` and an OpenTelemetry SDK configured in your application. +**Important:** Initializing `OpenTelemetry` only enables node-redis instrumentation and requires both `@opentelemetry/api` and an OpenTelemetry SDK configured in your application. **Important:** Initialize `OpenTelemetry` before creating Redis clients. For SDK/provider/exporter setup, verification, and advanced configuration, see: -- [OpenTelemetry Metrics docs](./docs/otel-metrics.md) -- [OpenTelemetry Metrics example](./examples/otel-metrics.js) +- [OpenTelemetry Metrics docs](./docs/otel-metrics.md) · [example](./examples/otel-metrics.js) +- [OpenTelemetry Tracing docs](./docs/otel-tracing.md) · [example](./examples/otel-tracing.js) ### Diagnostics Channel diff --git a/docs/otel-tracing.md b/docs/otel-tracing.md new file mode 100644 index 0000000000..bfcdfae6b8 --- /dev/null +++ b/docs/otel-tracing.md @@ -0,0 +1,218 @@ +# OpenTelemetry Tracing + +## Get started + +### Step 1. Install node-redis dependencies + +```bash +npm install redis @opentelemetry/api +``` + +`@opentelemetry/api` is required at runtime for `OpenTelemetry.init(...)`. + +### Step 2. Install OpenTelemetry SDK packages + +```bash +npm install @opentelemetry/sdk-trace-base @opentelemetry/context-async-hooks +``` + +Alternative (Node SDK): + +```bash +npm install @opentelemetry/sdk-node +``` + +If you export to OTLP or another backend, install the matching OpenTelemetry exporter package (e.g. `@opentelemetry/exporter-trace-otlp-http`). + +For more information, see the [OpenTelemetry Tracing documentation](https://opentelemetry.io/docs/instrumentation/js/exporters/#traces). + +### Step 3. Register OpenTelemetry + +Option A: Use `@opentelemetry/sdk-trace-base` directly + +```typescript +import { trace, context } from "@opentelemetry/api"; +import { + BasicTracerProvider, + SimpleSpanProcessor, + ConsoleSpanExporter, +} from "@opentelemetry/sdk-trace-base"; +import { AsyncLocalStorageContextManager } from "@opentelemetry/context-async-hooks"; + +const provider = new BasicTracerProvider({ + spanProcessors: [ + new SimpleSpanProcessor(new ConsoleSpanExporter()), + ], +}); + +context.setGlobalContextManager(new AsyncLocalStorageContextManager()); +trace.setGlobalTracerProvider(provider); +``` + +Option B: Use `@opentelemetry/sdk-node` + +```typescript +import { NodeSDK } from "@opentelemetry/sdk-node"; +import { ConsoleSpanExporter } from "@opentelemetry/sdk-trace-base"; + +const sdk = new NodeSDK({ + traceExporter: new ConsoleSpanExporter(), +}); + +await sdk.start(); +``` + +### Step 4. Initialize node-redis instrumentation before creating clients + +```typescript +import { createClient, OpenTelemetry } from "redis"; + +OpenTelemetry.init({ + tracing: { + enabled: true, + }, +}); + +const client = createClient(); +await client.connect(); +``` + +## Examples + +### Minimal Example + +```typescript +import { OpenTelemetry } from "redis"; + +OpenTelemetry.init({ + tracing: { + enabled: true, + }, +}); +``` + +### Full Example + +```typescript +import { OpenTelemetry } from "redis"; + +OpenTelemetry.init({ + tracing: { + enabled: true, + tracerProvider: customTracerProvider, + includeCommands: ["GET", "SET", "HSET"], + excludeCommands: ["PING"], + enableConnectionSpans: true, + }, +}); +``` + +### Combined with Metrics + +```typescript +import { OpenTelemetry } from "redis"; + +OpenTelemetry.init({ + metrics: { + enabled: true, + }, + tracing: { + enabled: true, + enableConnectionSpans: true, + }, +}); +``` + +## Configuration + +### TracingConfig + +| Property | Default | Description | +| -------- | ------- | ----------- | +| enabled | **false** | Enables span creation. | +| tracerProvider | | Uses this provider instead of the global provider from @opentelemetry/api. | +| includeCommands | **[]** | Case-insensitive allow-list for command spans. | +| excludeCommands | **[]** | Case-insensitive deny-list for command spans. If both include and exclude match, exclude wins. | +| enableConnectionSpans | **false** | Creates spans for `connect()` calls. | + +## Span types + +### Command spans + +Created for each Redis command (`GET`, `SET`, `HSET`, etc.). + +| Attribute | Value | +| --------- | ----- | +| `db.system.name` | `redis` | +| `db.operation.name` | The command name (e.g. `SET`) | +| `db.namespace` | The selected database index | +| `db.query.text` | The sanitized command and arguments | +| `server.address` | Redis server host | +| `server.port` | Redis server port | +| `redis.client.library` | `node-redis:` | + +### Batch spans (MULTI / PIPELINE) + +Created for `MULTI`/`EXEC` transactions and pipelines. Individual commands within the batch are traced as child spans. + +| Attribute | Value | +| --------- | ----- | +| `db.system.name` | `redis` | +| `db.operation.name` | `MULTI` or `PIPELINE` | +| `db.namespace` | The selected database index | +| `db.operation.batch.size` | Number of commands in the batch | +| `server.address` | Redis server host | +| `server.port` | Redis server port | +| `redis.client.library` | `node-redis:` | + +### Connection spans + +Created for `connect()` calls when `enableConnectionSpans` is `true`. + +| Attribute | Value | +| --------- | ----- | +| `db.system.name` | `redis` | +| `server.address` | Redis server host | +| `server.port` | Redis server port | +| `redis.client.library` | `node-redis:` | + +## Error handling + +When a command fails, the span records the exception and sets the following: + +| Attribute | Value | +| --------- | ----- | +| `error.type` | The error class name | +| `db.response.status_code` | The Redis error prefix (e.g. `WRONGTYPE`) if available | + +The span status is set to `ERROR` with the error message. + +## Context propagation + +Spans created by node-redis automatically participate in the active OpenTelemetry context. If you create an application-level span and execute Redis commands within it, the Redis spans appear as children: + +```typescript +const tracer = trace.getTracer("my-app"); + +await tracer.startActiveSpan("handleRequest", async (span) => { + // These Redis spans are children of "handleRequest" + await client.get("session:abc"); + await client.set("last-seen", Date.now().toString()); + span.end(); +}); +``` + +This also works with nested batch operations — individual command spans appear as children of their `MULTI` or `PIPELINE` parent span. + +Context propagation requires `@opentelemetry/context-async-hooks` (or the Node SDK) to be registered. + +## Notes + +- `OpenTelemetry` is a singleton and a second `init` call throws. +- If `@opentelemetry/api` is not installed, `init` throws. +- Command arguments in `db.query.text` are sanitized — values for write commands are replaced with `?` to avoid leaking sensitive data. +- Tracing is built on top of the [Diagnostics Channel](./diagnostics-channel.md) `TracingChannel` infrastructure. Requires Node.js >= 18.19.0. + +## Runnable example + +See ../examples/otel-tracing.js. diff --git a/examples/README.md b/examples/README.md index 5e82b3a82b..75c223b30d 100644 --- a/examples/README.md +++ b/examples/README.md @@ -19,6 +19,7 @@ This folder contains example scripts showing how to use Node Redis in different | `lua-multi-incr.js` | Define a custom lua script that allows you to perform INCRBY on multiple keys. | | `managing-json.js` | Store, retrieve and manipulate JSON data atomically with [RedisJSON](https://redisjson.io/). | | `otel-metrics.js` | Enable OpenTelemetry metrics for node-redis, generate command and resiliency signals, and export them via OpenTelemetry SDK metrics. | +| `otel-tracing.js` | Enable OpenTelemetry tracing for node-redis, demonstrating command spans, MULTI transactions, nested app spans, and error recording. | | `pubsub-publisher.js` | Adds multiple messages on 2 different channels messages to Redis. | | `pubsub-subscriber.js` | Reads messages from channels using `PSUBSCRIBE` command. | | `search-hashes.js` | Uses [RediSearch](https://redisearch.io) to index and search data in hashes. | diff --git a/examples/otel-tracing.js b/examples/otel-tracing.js new file mode 100644 index 0000000000..79df841f7d --- /dev/null +++ b/examples/otel-tracing.js @@ -0,0 +1,73 @@ +// OpenTelemetry tracing example for node-redis. +// Demonstrates span creation for commands, MULTI transactions, and nested app spans. + +import { createClient, OpenTelemetry } from 'redis'; +import { trace, context } from '@opentelemetry/api'; +import { + BasicTracerProvider, + SimpleSpanProcessor, + ConsoleSpanExporter, +} from '@opentelemetry/sdk-trace-base'; +import { AsyncLocalStorageContextManager } from '@opentelemetry/context-async-hooks'; + +// Export spans to console for easy local verification. +const provider = new BasicTracerProvider({ + spanProcessors: [ + new SimpleSpanProcessor(new ConsoleSpanExporter()), + ], +}); + +context.setGlobalContextManager(new AsyncLocalStorageContextManager()); +trace.setGlobalTracerProvider(provider); + +// Enable OpenTelemetry before creating clients. +OpenTelemetry.init({ + tracing: { + enabled: true, + enableConnectionSpans: true, + }, +}); + +const client = createClient(); + +client.on('error', (err) => { + console.error('Redis client error:', err); +}); + +const tracer = trace.getTracer('example-app'); + +try { + await client.connect(); + + // Normal command traffic — each creates a span. + await client.set('otel:example:key', 'hello'); + await client.get('otel:example:key'); + + // MULTI transaction — creates a parent batch span with child command spans. + await client.multi() + .set('otel:example:a', '1') + .set('otel:example:b', '2') + .get('otel:example:a') + .exec(); + + // Application span wrapping Redis commands — demonstrates parent-child propagation. + await tracer.startActiveSpan('myApp.processRequest', async (span) => { + await client.get('otel:example:key'); + await client.set('otel:example:visited', 'true'); + span.end(); + }); + + // Generate a handled error to demonstrate error span recording. + await client.hSet('otel:example:hash', 'field', 'value'); + try { + await client.incr('otel:example:hash'); + } catch (err) { + console.log('Expected command error:', err); + } + + // Force export so output is visible immediately. + await provider.forceFlush(); +} finally { + client.destroy(); + await provider.shutdown(); +} diff --git a/package-lock.json b/package-lock.json index 3a9203e9a3..491eab7425 100644 --- a/package-lock.json +++ b/package-lock.json @@ -345,6 +345,7 @@ "version": "7.23.9", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.23.5", @@ -1561,6 +1562,7 @@ "integrity": "sha512-vvmsN0r7rguA+FySiCsbaTTobSftpIDIpPW81trAmsv9TGxg3YCujAxRYp/Uy8xmDgYCzzgulG62H7KYUFmeIg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@octokit/auth-token": "^5.0.0", "@octokit/graphql": "^8.2.2", @@ -1751,10 +1753,24 @@ "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", "dev": true, "license": "Apache-2.0", + "peer": true, "engines": { "node": ">=8.0.0" } }, + "node_modules/@opentelemetry/context-async-hooks": { + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/@opentelemetry/context-async-hooks/-/context-async-hooks-2.6.1.tgz", + "integrity": "sha512-XHzhwRNkBpeP8Fs/qjGrAf9r9PRv67wkJQ/7ZPaBQQ68DYlTBBx5MF9LvPx7mhuXcDessKK2b+DcxqwpgkcivQ==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.0.0 <1.10.0" + } + }, "node_modules/@opentelemetry/core": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.2.0.tgz", @@ -1805,6 +1821,57 @@ "@opentelemetry/api": ">=1.9.0 <1.10.0" } }, + "node_modules/@opentelemetry/sdk-trace-base": { + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.6.1.tgz", + "integrity": "sha512-r86ut4T1e8vNwB35CqCcKd45yzqH6/6Wzvpk2/cZB8PsPLlZFTvrh8yfOS3CYZYcUmAx4hHTZJ8AO8Dj8nrdhw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@opentelemetry/core": "2.6.1", + "@opentelemetry/resources": "2.6.1", + "@opentelemetry/semantic-conventions": "^1.29.0" + }, + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.3.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/sdk-trace-base/node_modules/@opentelemetry/core": { + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.6.1.tgz", + "integrity": "sha512-8xHSGWpJP9wBxgBpnqGL0R3PbdWQndL1Qp50qrg71+B28zK5OQmUgcDKLJgzyAAV38t4tOyLMGDD60LneR5W8g==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@opentelemetry/semantic-conventions": "^1.29.0" + }, + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.0.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/sdk-trace-base/node_modules/@opentelemetry/resources": { + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.6.1.tgz", + "integrity": "sha512-lID/vxSuKWXM55XhAKNoYXu9Cutoq5hFdkbTdI/zDKQktXzcWBVhNsOkiZFTMU9UtEWuGRNe0HUgmsFldIdxVA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@opentelemetry/core": "2.6.1", + "@opentelemetry/semantic-conventions": "^1.29.0" + }, + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.3.0 <1.10.0" + } + }, "node_modules/@opentelemetry/semantic-conventions": { "version": "1.37.0", "resolved": "https://registry.npmjs.org/@opentelemetry/semantic-conventions/-/semantic-conventions-1.37.0.tgz", @@ -2058,6 +2125,7 @@ "version": "20.11.16", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~5.26.4" } @@ -2475,6 +2543,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001580", "electron-to-chromium": "^1.4.648", @@ -5266,6 +5335,7 @@ "version": "10.2.0", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "ansi-colors": "4.1.1", "browser-stdout": "1.3.1", @@ -5547,6 +5617,7 @@ "version": "15.1.0", "dev": true, "license": "ISC", + "peer": true, "dependencies": { "@istanbuljs/load-nyc-config": "^1.0.0", "@istanbuljs/schema": "^0.1.2", @@ -6486,6 +6557,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "@nodeutils/defaults-deep": "1.1.0", "@octokit/rest": "21.1.1", @@ -7212,6 +7284,7 @@ "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -7442,6 +7515,7 @@ "version": "5.3.3", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -7831,13 +7905,16 @@ "name": "@redis/client", "version": "5.11.0", "license": "MIT", + "peer": true, "dependencies": { "cluster-key-slot": "1.1.2" }, "devDependencies": { "@node-rs/xxhash": "1.7.6", "@opentelemetry/api": "^1.9.0", + "@opentelemetry/context-async-hooks": "^2.6.1", "@opentelemetry/sdk-metrics": "^2.2.0", + "@opentelemetry/sdk-trace-base": "^2.6.1", "@redis/test-utils": "*", "@types/sinon": "^17.0.3", "sinon": "^17.0.1" diff --git a/packages/client/lib/opentelemetry/index.ts b/packages/client/lib/opentelemetry/index.ts index 1872a95a01..84c1f17eb9 100644 --- a/packages/client/lib/opentelemetry/index.ts +++ b/packages/client/lib/opentelemetry/index.ts @@ -1,6 +1,7 @@ import { OpenTelemetryError } from "../errors"; import { ClientRegistry } from "./client-registry"; import { OTelMetrics } from "./metrics"; +import { OTelTracing } from "./tracing"; import { ObservabilityConfig } from "./types"; export class OpenTelemetry { @@ -12,8 +13,8 @@ export class OpenTelemetry { /** * Initializes node-redis OpenTelemetry observability. * - * This bootstraps node-redis metrics instrumentation and registers the - * internal client registry used by metric collectors. + * This bootstraps node-redis metrics and/or tracing instrumentation and + * registers the internal client registry used by metric collectors. * * Call this once during application startup, before creating Redis clients * you want to observe. @@ -27,7 +28,7 @@ export class OpenTelemetry { * * @example * ```ts - * import { metrics } from "@opentelemetry/api"; + * import { metrics, trace } from "@opentelemetry/api"; * import { * ConsoleMetricExporter, * MeterProvider, @@ -48,6 +49,10 @@ export class OpenTelemetry { * enabledMetricGroups: ["pubsub", "connection-basic", "resiliency"], * includeCommands: ["GET", "SET"], * hidePubSubChannelNames: true + * }, + * tracing: { + * enabled: true, + * enableConnectionSpans: false, * } * }); * ``` @@ -68,6 +73,10 @@ export class OpenTelemetry { OpenTelemetry._instance = new OpenTelemetry(); ClientRegistry.init(); OTelMetrics.init({ api, config }); + + if (config?.tracing?.enabled) { + OTelTracing.init({ api, config: config.tracing }); + } } } @@ -77,8 +86,10 @@ export { CONNECTION_CLOSE_REASON, CSC_RESULT, CSC_EVICTION_REASON, + TracingConfig, } from "./types"; export { OTelMetrics } from "./metrics"; +export { OTelTracing } from "./tracing"; export { ClientRegistry, ClientMetricsHandle, diff --git a/packages/client/lib/opentelemetry/tracing.spec.ts b/packages/client/lib/opentelemetry/tracing.spec.ts new file mode 100644 index 0000000000..d5f97a36f0 --- /dev/null +++ b/packages/client/lib/opentelemetry/tracing.spec.ts @@ -0,0 +1,367 @@ +import { strict as assert } from "node:assert"; +import dc from "node:diagnostics_channel"; + +import * as api from "@opentelemetry/api"; +import { + BasicTracerProvider, + InMemorySpanExporter, + SimpleSpanProcessor, +} from "@opentelemetry/sdk-trace-base"; +import { AsyncLocalStorageContextManager } from "@opentelemetry/context-async-hooks"; +import { OTelTracing } from "./tracing"; +import { OTEL_ATTRIBUTES } from "./types"; + +describe("OTelTracing", () => { + let exporter: InMemorySpanExporter; + let provider: BasicTracerProvider; + + let contextManager: AsyncLocalStorageContextManager; + + beforeEach(() => { + contextManager = new AsyncLocalStorageContextManager().enable(); + api.context.setGlobalContextManager(contextManager); + + exporter = new InMemorySpanExporter(); + provider = new BasicTracerProvider({ + spanProcessors: [new SimpleSpanProcessor(exporter)], + }); + api.trace.setGlobalTracerProvider(provider); + }); + + afterEach(async () => { + OTelTracing.reset(); + await provider.shutdown(); + api.trace.disable(); + api.context.disable(); + }); + + describe("command spans", () => { + it("should create a span for a command", async () => { + OTelTracing.init({ api, config: { enabled: true } }); + + const tc = dc.tracingChannel("node-redis:command"); + const ctx = { + command: "SET", + args: ["SET", "mykey", "?"], + database: 0, + serverAddress: "localhost", + serverPort: 6379, + clientId: "test-client", + }; + + await tc.tracePromise(async () => {}, ctx); + + await provider.forceFlush(); + const spans = exporter.getFinishedSpans(); + + assert.equal(spans.length, 1); + assert.equal(spans[0].name, "SET"); + assert.equal(spans[0].attributes[OTEL_ATTRIBUTES.dbOperationName], "SET"); + assert.equal(spans[0].attributes[OTEL_ATTRIBUTES.dbNamespace], "0"); + assert.equal(spans[0].attributes[OTEL_ATTRIBUTES.serverAddress], "localhost"); + assert.equal(spans[0].attributes[OTEL_ATTRIBUTES.serverPort], 6379); + assert.equal(spans[0].attributes["db.query.text"], "SET mykey ?"); + assert.equal(spans[0].attributes[OTEL_ATTRIBUTES.dbSystemName], "redis"); + }); + + it("should record errors on command spans", async () => { + OTelTracing.init({ api, config: { enabled: true } }); + + const tc = dc.tracingChannel("node-redis:command"); + const ctx = { + command: "GET", + args: ["GET", "mykey"], + database: 0, + serverAddress: "localhost", + serverPort: 6379, + clientId: "test-client", + }; + + const error = new Error("READONLY You can't write against a read only replica"); + try { + await tc.tracePromise(async () => { throw error; }, ctx); + } catch {} + + await provider.forceFlush(); + const spans = exporter.getFinishedSpans(); + + assert.equal(spans.length, 1); + assert.equal(spans[0].status.code, api.SpanStatusCode.ERROR); + assert.equal(spans[0].events.length, 1); + assert.equal(spans[0].events[0].name, "exception"); + assert.equal(spans[0].attributes[OTEL_ATTRIBUTES.errorType], "Error"); + }); + + it("should set db.response.status_code for Redis ErrorReply", async () => { + OTelTracing.init({ api, config: { enabled: true } }); + + const tc = dc.tracingChannel("node-redis:command"); + const ctx = { + command: "SET", + args: ["SET", "k", "?"], + database: 0, + serverAddress: "localhost", + serverPort: 6379, + clientId: "test", + }; + + // Simulate an ErrorReply (has the class name "ErrorReply") + const { ErrorReply } = require("../errors"); + const error = new ErrorReply("WRONGTYPE Operation against a key holding the wrong kind of value"); + try { + await tc.tracePromise(async () => { throw error; }, ctx); + } catch {} + + await provider.forceFlush(); + const spans = exporter.getFinishedSpans(); + + assert.equal(spans.length, 1); + assert.equal(spans[0].attributes[OTEL_ATTRIBUTES.errorType], "ErrorReply"); + assert.equal(spans[0].attributes["db.response.status_code"], "WRONGTYPE"); + }); + + it("should exclude commands via excludeCommands", async () => { + OTelTracing.init({ + api, + config: { enabled: true, excludeCommands: ["PING"] }, + }); + + const tc = dc.tracingChannel("node-redis:command"); + + // Excluded command + const pingCtx = { + command: "PING", + args: ["PING"], + database: 0, + serverAddress: "localhost", + serverPort: 6379, + clientId: "test", + }; + await tc.tracePromise(async () => {}, pingCtx); + + // Included command + const setCtx = { + command: "SET", + args: ["SET", "k", "?"], + database: 0, + serverAddress: "localhost", + serverPort: 6379, + clientId: "test", + }; + await tc.tracePromise(async () => {}, setCtx); + + await provider.forceFlush(); + const spans = exporter.getFinishedSpans(); + + assert.equal(spans.length, 1); + assert.equal(spans[0].name, "SET"); + }); + + it("should filter via includeCommands", async () => { + OTelTracing.init({ + api, + config: { enabled: true, includeCommands: ["GET"] }, + }); + + const tc = dc.tracingChannel("node-redis:command"); + + const setCtx = { + command: "SET", + args: ["SET", "k", "?"], + database: 0, + serverAddress: "localhost", + serverPort: 6379, + clientId: "test", + }; + await tc.tracePromise(async () => {}, setCtx); + + const getCtx = { + command: "GET", + args: ["GET", "k"], + database: 0, + serverAddress: "localhost", + serverPort: 6379, + clientId: "test", + }; + await tc.tracePromise(async () => {}, getCtx); + + await provider.forceFlush(); + const spans = exporter.getFinishedSpans(); + + assert.equal(spans.length, 1); + assert.equal(spans[0].name, "GET"); + }); + }); + + describe("batch spans", () => { + it("should create a span for MULTI batch", async () => { + OTelTracing.init({ api, config: { enabled: true } }); + + const tc = dc.tracingChannel("node-redis:batch"); + const ctx = { + batchMode: "MULTI", + batchSize: 3, + database: 0, + serverAddress: "localhost", + serverPort: 6379, + clientId: "test", + }; + + await tc.tracePromise(async () => {}, ctx); + + await provider.forceFlush(); + const spans = exporter.getFinishedSpans(); + + assert.equal(spans.length, 1); + assert.equal(spans[0].name, "MULTI"); + assert.equal(spans[0].attributes[OTEL_ATTRIBUTES.dbOperationName], "MULTI"); + assert.equal(spans[0].attributes[OTEL_ATTRIBUTES.dbOperationBatchSize], 3); + }); + + it("should create parent-child relationship for batch > commands", async () => { + OTelTracing.init({ api, config: { enabled: true } }); + + const batchTC = dc.tracingChannel("node-redis:batch"); + const cmdTC = dc.tracingChannel("node-redis:command"); + + const batchCtx = { + batchMode: "MULTI", + batchSize: 2, + database: 0, + serverAddress: "localhost", + serverPort: 6379, + clientId: "test", + }; + + await batchTC.tracePromise(async () => { + const cmd1Ctx = { + command: "SET", + args: ["SET", "k1", "?"], + database: 0, + serverAddress: "localhost", + serverPort: 6379, + clientId: "test", + }; + await cmdTC.tracePromise(async () => {}, cmd1Ctx); + + const cmd2Ctx = { + command: "SET", + args: ["SET", "k2", "?"], + database: 0, + serverAddress: "localhost", + serverPort: 6379, + clientId: "test", + }; + await cmdTC.tracePromise(async () => {}, cmd2Ctx); + }, batchCtx); + + await provider.forceFlush(); + const spans = exporter.getFinishedSpans(); + + assert.equal(spans.length, 3, "expected 3 spans (1 batch + 2 commands)"); + + const batchSpan = spans.find(s => s.name === "MULTI")!; + const cmdSpans = spans.filter(s => s.name === "SET"); + + assert.equal(cmdSpans.length, 2); + + // Command spans should be children of the batch span + for (const cmd of cmdSpans) { + assert.equal( + (cmd as any).parentSpanContext?.spanId, + batchSpan.spanContext().spanId, + "command span should be a child of the batch span", + ); + } + }); + }); + + describe("connection spans", () => { + it("should not create connection spans by default", async () => { + OTelTracing.init({ api, config: { enabled: true } }); + + const tc = dc.tracingChannel("node-redis:connect"); + const ctx = { + serverAddress: "localhost", + serverPort: 6379, + clientId: "test", + }; + + await tc.tracePromise(async () => {}, ctx); + + await provider.forceFlush(); + const spans = exporter.getFinishedSpans(); + assert.equal(spans.length, 0, "connection spans should be off by default"); + }); + + it("should create connection spans when enabled", async () => { + OTelTracing.init({ + api, + config: { enabled: true, enableConnectionSpans: true }, + }); + + const tc = dc.tracingChannel("node-redis:connect"); + const ctx = { + serverAddress: "localhost", + serverPort: 6379, + clientId: "test", + }; + + await tc.tracePromise(async () => {}, ctx); + + await provider.forceFlush(); + const spans = exporter.getFinishedSpans(); + assert.equal(spans.length, 1); + assert.equal(spans[0].name, "redis connect"); + assert.equal(spans[0].attributes[OTEL_ATTRIBUTES.serverAddress], "localhost"); + assert.equal(spans[0].attributes[OTEL_ATTRIBUTES.serverPort], 6379); + }); + + }); + + describe("lifecycle", () => { + it("should init only once", () => { + OTelTracing.init({ api, config: { enabled: true } }); + // Second init should be a no-op (not throw) + OTelTracing.init({ api, config: { enabled: true } }); + assert.ok(OTelTracing.isInitialized); + }); + + it("should reset cleanly", () => { + OTelTracing.init({ api, config: { enabled: true } }); + assert.ok(OTelTracing.isInitialized); + OTelTracing.reset(); + assert.ok(!OTelTracing.isInitialized); + }); + + it("should accept a custom tracerProvider", async () => { + const customExporter = new InMemorySpanExporter(); + const customProvider = new BasicTracerProvider({ + spanProcessors: [new SimpleSpanProcessor(customExporter)], + }); + + OTelTracing.init({ + api, + config: { enabled: true, tracerProvider: customProvider }, + }); + + const tc = dc.tracingChannel("node-redis:command"); + const ctx = { + command: "GET", + args: ["GET", "k"], + database: 0, + serverAddress: "localhost", + serverPort: 6379, + clientId: "test", + }; + await tc.tracePromise(async () => {}, ctx); + + await customProvider.forceFlush(); + const spans = customExporter.getFinishedSpans(); + assert.equal(spans.length, 1); + assert.equal(spans[0].name, "GET"); + + await customProvider.shutdown(); + }); + }); +}); diff --git a/packages/client/lib/opentelemetry/tracing.ts b/packages/client/lib/opentelemetry/tracing.ts new file mode 100644 index 0000000000..5bc032fc04 --- /dev/null +++ b/packages/client/lib/opentelemetry/tracing.ts @@ -0,0 +1,311 @@ +import type * as DC from 'node:diagnostics_channel'; +import type { + Span, + SpanOptions, + Tracer, +} from '@opentelemetry/api'; +import { + CHANNELS, + getTracingChannel, +} from '../client/tracing'; +import { + DEFAULT_OTEL_ATTRIBUTES, + INSTRUMENTATION_SCOPE_NAME, + OTEL_ATTRIBUTES, + TracingConfig, +} from './types'; +import { getErrorInfo } from './utils'; + +interface TracingOptions { + includeCommands: Record; + excludeCommands: Record; + hasIncludeCommands: boolean; + hasExcludeCommands: boolean; + enableConnectionSpans: boolean; +} + +// Symbol used to attach the span to the TracingChannel context object. +// Using a symbol avoids collisions with channel context properties. +const SPAN_KEY = Symbol('otel.span'); + +interface WithSpan { + [SPAN_KEY]?: Span; +} + +/** + * OTel span creation via TracingChannel subscribers. + */ +export class OTelTracing { + static #instance: OTelTracing | undefined; + static #initialized = false; + + readonly #tracer: Tracer; + readonly #options: TracingOptions; + readonly #unsubscribers: Array<() => void> = []; + readonly #api: typeof import('@opentelemetry/api'); + readonly #otelStorage: any; // OTel's internal AsyncLocalStorage (if accessible) + + private constructor( + api: typeof import('@opentelemetry/api'), + config?: TracingConfig, + ) { + this.#api = api; + this.#tracer = config?.tracerProvider + ? config.tracerProvider.getTracer(INSTRUMENTATION_SCOPE_NAME) + : api.trace.getTracer(INSTRUMENTATION_SCOPE_NAME); + + // Context propagation uses `channel.start.bindStore()` on OTel's internal + // AsyncLocalStorage so that spans created in the `start` handler become + // the active span for the duration of the traced operation. This is required + // because OTel's context manager and Node.js TracingChannel do not integrate + // automatically (yet). All APM consumers must do the same. + // See: https://github.com/open-telemetry/opentelemetry-js/issues/4986 + const contextManager = (api.context as any)._getContextManager?.(); + this.#otelStorage = contextManager?._asyncLocalStorage; + + this.#options = { + includeCommands: (config?.includeCommands ?? []).reduce>((acc, c) => { + acc[c.toUpperCase()] = true; + return acc; + }, {}), + hasIncludeCommands: !!config?.includeCommands?.length, + excludeCommands: (config?.excludeCommands ?? []).reduce>((acc, c) => { + acc[c.toUpperCase()] = true; + return acc; + }, {}), + hasExcludeCommands: !!config?.excludeCommands?.length, + enableConnectionSpans: config?.enableConnectionSpans ?? false, + }; + + this.#subscribeCommands(); + this.#subscribeBatch(); + + if (this.#options.enableConnectionSpans) { + this.#subscribeConnect(); + } + } + + static init({ + api, + config, + }: { + api: typeof import('@opentelemetry/api'); + config?: TracingConfig; + }) { + if (OTelTracing.#initialized) return; + OTelTracing.#instance = new OTelTracing(api, config); + OTelTracing.#initialized = true; + } + + static reset() { + OTelTracing.#instance?.destroy(); + OTelTracing.#instance = undefined; + OTelTracing.#initialized = false; + } + + static get isInitialized() { + return OTelTracing.#initialized; + } + + destroy() { + this.#unsubscribers.forEach(fn => fn()); + } + + // --------------------------------------------------------------------------- + // Core tracing helper + // --------------------------------------------------------------------------- + + /** + * Subscribes to a TracingChannel with span lifecycle management. + * + * When OTel's ALS is accessible, uses `bindStore` so the span is propagated + * as the active context for the duration of the traced operation (enabling + * automatic parent-child relationships for nested channels like batch > command). + * + * When ALS is not accessible, falls back to a WeakMap-based approach where + * spans are still created and ended, but won't act as parents for nested spans. + * + * @param onStart - called with the channel context, returns span name + options + * or `undefined` to skip (e.g. excluded commands) + */ + #traceChannel( + tc: DC.TracingChannel, + onStart: (ctx: any) => { name: string; options: SpanOptions } | undefined, + ) { + const { trace, context } = this.#api; + + if (this.#otelStorage) { + // Bind OTel's ALS to the start channel for context propagation + (tc as any).start.bindStore(this.#otelStorage, (data: WithSpan & any) => { + const desc = onStart(data); + if (!desc) return context.active(); + + const parentCtx = context.active(); + const span = this.#tracer.startSpan(desc.name, desc.options, parentCtx); + data[SPAN_KEY] = span; + return trace.setSpan(parentCtx, span); + }); + + const handlers = { + asyncEnd: (ctx: WithSpan) => { + ctx[SPAN_KEY]?.end(); + }, + error: (ctx: WithSpan & { error?: Error }) => { + const span = ctx[SPAN_KEY]; + if (span) { + this.#endSpanWithError(span, ctx.error!); + } + }, + } as DC.TracingChannelSubscribers; + + tc.subscribe(handlers); + this.#unsubscribers.push(() => { + tc.unsubscribe(handlers); + (tc as any).start.unbindStore(this.#otelStorage); + }); + } else { + // Fallback: WeakMap-based span tracking (no context propagation) + const spans = new WeakMap(); + + const handlers = { + start: (ctx: any) => { + const desc = onStart(ctx); + if (!desc) return; + spans.set(ctx, this.#tracer.startSpan(desc.name, desc.options)); + }, + asyncEnd: (ctx: any) => { + const span = spans.get(ctx); + if (span) { + span.end(); + spans.delete(ctx); + } + }, + error: (ctx: any) => { + const span = spans.get(ctx); + if (span) { + this.#endSpanWithError(span, ctx.error); + spans.delete(ctx); + } + }, + } as DC.TracingChannelSubscribers; + + tc.subscribe(handlers); + this.#unsubscribers.push(() => tc.unsubscribe(handlers)); + } + } + + // --------------------------------------------------------------------------- + // Command spans + // --------------------------------------------------------------------------- + + #subscribeCommands() { + const tc = getTracingChannel(CHANNELS.TRACE_COMMAND); + if (!tc) return; + + const { SpanKind } = this.#api; + + this.#traceChannel(tc, (ctx) => { + const commandName = ctx.command?.toString() ?? 'UNKNOWN'; + if (this.#isCommandExcluded(commandName)) return undefined; + + return { + name: commandName, + options: { + kind: SpanKind.CLIENT, + attributes: { + ...DEFAULT_OTEL_ATTRIBUTES, + [OTEL_ATTRIBUTES.dbOperationName]: commandName, + [OTEL_ATTRIBUTES.dbNamespace]: String(ctx.database), + [OTEL_ATTRIBUTES.serverAddress]: ctx.serverAddress, + ...(ctx.serverPort !== undefined && { + [OTEL_ATTRIBUTES.serverPort]: ctx.serverPort, + }), + 'db.query.text': (ctx.args as ReadonlyArray).join(' '), + }, + }, + }; + }); + } + + // --------------------------------------------------------------------------- + // Batch spans (MULTI / PIPELINE) + // --------------------------------------------------------------------------- + + #subscribeBatch() { + const tc = getTracingChannel(CHANNELS.TRACE_BATCH); + if (!tc) return; + + const { SpanKind } = this.#api; + + this.#traceChannel(tc, (ctx) => { + const batchMode = ctx.batchMode ?? 'PIPELINE'; + + return { + name: batchMode, + options: { + kind: SpanKind.CLIENT, + attributes: { + ...DEFAULT_OTEL_ATTRIBUTES, + [OTEL_ATTRIBUTES.dbOperationName]: batchMode, + [OTEL_ATTRIBUTES.dbNamespace]: String(ctx.database), + [OTEL_ATTRIBUTES.serverAddress]: ctx.serverAddress, + ...(ctx.serverPort !== undefined && { + [OTEL_ATTRIBUTES.serverPort]: ctx.serverPort, + }), + [OTEL_ATTRIBUTES.dbOperationBatchSize]: ctx.batchSize, + }, + }, + }; + }); + } + + // --------------------------------------------------------------------------- + // Connection spans + // --------------------------------------------------------------------------- + + #subscribeConnect() { + const tc = getTracingChannel(CHANNELS.TRACE_CONNECT); + if (!tc) return; + + const { SpanKind } = this.#api; + + this.#traceChannel(tc, (ctx) => ({ + name: 'redis connect', + options: { + kind: SpanKind.CLIENT, + attributes: { + ...DEFAULT_OTEL_ATTRIBUTES, + [OTEL_ATTRIBUTES.serverAddress]: ctx.serverAddress, + ...(ctx.serverPort !== undefined && { + [OTEL_ATTRIBUTES.serverPort]: ctx.serverPort, + }), + }, + }, + })); + } + + // --------------------------------------------------------------------------- + // Helpers + // --------------------------------------------------------------------------- + + #endSpanWithError(span: Span, error: Error) { + const { SpanStatusCode } = this.#api; + const errorInfo = getErrorInfo(error); + + span.recordException(error); + span.setAttribute(OTEL_ATTRIBUTES.errorType, errorInfo.errorType); + if (errorInfo.statusCode !== undefined) { + span.setAttribute(OTEL_ATTRIBUTES.dbResponseStatusCode, errorInfo.statusCode); + } + span.setStatus({ code: SpanStatusCode.ERROR, message: error.message }); + span.end(); + } + + #isCommandExcluded(commandName: string) { + const name = commandName?.toUpperCase() ?? ''; + return ( + (this.#options.hasIncludeCommands && !this.#options.includeCommands[name]) || + this.#options.excludeCommands[name] + ); + } +} diff --git a/packages/client/lib/opentelemetry/types.ts b/packages/client/lib/opentelemetry/types.ts index 08f63e8102..63ab9f0006 100644 --- a/packages/client/lib/opentelemetry/types.ts +++ b/packages/client/lib/opentelemetry/types.ts @@ -4,6 +4,7 @@ import { Histogram, MeterProvider, ObservableGauge, + TracerProvider, UpDownCounter, } from "@opentelemetry/api"; import { version } from "../../package.json"; @@ -43,8 +44,17 @@ export interface OTelClientAttributes { isPubSub?: boolean; } +export interface TracingConfig { + enabled?: boolean; + tracerProvider?: TracerProvider; + includeCommands?: string[]; + excludeCommands?: string[]; + enableConnectionSpans?: boolean; +} + export interface ObservabilityConfig { metrics?: MetricConfig; + tracing?: TracingConfig; } export interface MetricOptions extends Required< @@ -101,6 +111,7 @@ export const OTEL_ATTRIBUTES = { serverPort: "server.port", networkPeerAddress: "network.peer.address", networkPeerPort: "network.peer.port", + dbOperationBatchSize: "db.operation.batch.size", dbStoredProcedureName: "db.stored_procedure.name", dbClientConnectionPoolName: "db.client.connection.pool.name", dbClientConnectionState: "db.client.connection.state", diff --git a/packages/client/package.json b/packages/client/package.json index 68ff80f113..fc9f6c5d67 100644 --- a/packages/client/package.json +++ b/packages/client/package.json @@ -18,7 +18,9 @@ "devDependencies": { "@node-rs/xxhash": "1.7.6", "@opentelemetry/api": "^1.9.0", + "@opentelemetry/context-async-hooks": "^2.6.1", "@opentelemetry/sdk-metrics": "^2.2.0", + "@opentelemetry/sdk-trace-base": "^2.6.1", "@redis/test-utils": "*", "@types/sinon": "^17.0.3", "sinon": "^17.0.1" @@ -49,4 +51,4 @@ "keywords": [ "redis" ] -} \ No newline at end of file +}