Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion deployment/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { deployPostgres } from './services/postgres';
import { deployProxy } from './services/proxy';
import { deployPublicGraphQLAPIGateway } from './services/public-graphql-api-gateway';
import { deployRedis } from './services/redis';
import { deployRedpanda } from './services/redpanda';
import { deployS3, deployS3AuditLog, deployS3Mirror } from './services/s3';
import { deploySchema } from './services/schema';
import { configureSentry } from './services/sentry';
Expand Down Expand Up @@ -79,6 +80,7 @@ const clickhouse = deployClickhouse();
const postgres = deployPostgres();
const redis = deployRedis({ environment });
const kafka = deployKafka();
const redpanda = deployRedpanda({ environment });
const s3 = deployS3();
const s3Mirror = deployS3Mirror();
const s3AuditLog = deployS3AuditLog();
Expand Down Expand Up @@ -290,6 +292,7 @@ const otelCollector = deployOTELCollector({
graphql,
dbMigrations,
clickhouse,
redpanda,
image: docker.factory.getImageId('otel-collector', imagesTag),
docker,
});
Expand Down Expand Up @@ -350,6 +353,8 @@ export const schemaApiServiceId = schema.service.id;
export const webhooksApiServiceId = webhooks.service.id;

export const appId = app.deployment.id;
export const otelCollectorId = otelCollector.deployment.id;
export const otelCollectorIngressId = otelCollector.ingress.deployment.id;
export const otelCollectorEgressId = otelCollector.egress.deployment.id;
export const redpandaStatefulSetId = redpanda.statefulSet.id;
export const publicIp = proxy.get()!.status.loadBalancer.ingress[0].ip;
export const awsLambdaArtifactsFunctionUrl = lambdaFunction;
12 changes: 9 additions & 3 deletions deployment/services/environment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,15 @@ export function prepareEnvironment(input: {
memoryLimit: isProduction ? '1000Mi' : '300Mi',
},
tracingCollector: {
cpuLimit: isProduction || isStaging ? '1000m' : '100m',
memoryLimit: isProduction || isStaging ? '1000Mi' : '512Mi',
maxReplicas: isProduction || isStaging ? 3 : 1,
cpuLimit: '500m',
memoryLimit: '512Mi',
maxReplicas: 3,
},
redpanda: {
replicas: 1,
cpuLimit: '500m',
memoryLimit: '1000Mi',
storageSize: '20Gi',
},
},
};
Expand Down
48 changes: 45 additions & 3 deletions deployment/services/otel-collector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { DbMigrations } from './db-migrations';
import { Docker } from './docker';
import { Environment } from './environment';
import { GraphQL } from './graphql';
import { Redpanda } from './redpanda';

export type OTELCollector = ReturnType<typeof deployOTELCollector>;

Expand All @@ -15,9 +16,12 @@ export function deployOTELCollector(args: {
clickhouse: Clickhouse;
dbMigrations: DbMigrations;
graphql: GraphQL;
redpanda: Redpanda;
}) {
return new ServiceDeployment(
'otel-collector',
const kafkaBroker = args.redpanda.brokerEndpoint;

const ingress = new ServiceDeployment(
'otel-collector-ingress',
{
image: args.image,
imagePullSecret: args.docker.secret,
Expand All @@ -26,6 +30,7 @@ export function deployOTELCollector(args: {
HIVE_OTEL_AUTH_ENDPOINT: serviceLocalEndpoint(args.graphql.service).apply(
value => value + '/otel-auth',
),
KAFKA_BROKER: kafkaBroker,
},
/**
* We are using the healthcheck extension.
Expand All @@ -44,7 +49,36 @@ export function deployOTELCollector(args: {
autoScaling: {
maxReplicas: args.environment.podsConfig.tracingCollector.maxReplicas,
cpu: {
limit: args.environment.podsConfig.tracingCollector.cpuLimit,
limit: '500m',
cpuAverageToScale: 80,
},
},
},
[args.dbMigrations],
).deploy();

// Egress: Redpanda -> ClickHouse
const egress = new ServiceDeployment(
'otel-collector-egress',
{
image: args.image,
imagePullSecret: args.docker.secret,
env: {
...args.environment.envVars,
KAFKA_BROKER: kafkaBroker,
},
probePort: 13133,
readinessProbe: '/',
livenessProbe: '/',
startupProbe: '/',
exposesMetrics: true,
replicas: args.environment.podsConfig.tracingCollector.maxReplicas,
pdb: true,
memoryLimit: args.environment.podsConfig.tracingCollector.memoryLimit,
autoScaling: {
maxReplicas: args.environment.podsConfig.tracingCollector.maxReplicas,
cpu: {
limit: '500m',
cpuAverageToScale: 80,
},
},
Expand All @@ -57,4 +91,12 @@ export function deployOTELCollector(args: {
.withSecret('CLICKHOUSE_PASSWORD', args.clickhouse.secret, 'password')
.withSecret('CLICKHOUSE_PROTOCOL', args.clickhouse.secret, 'protocol')
.deploy();

return {
ingress,
egress,
// For backward compatibility, expose ingress as the main deployment
deployment: ingress.deployment,
service: ingress.service,
};
}
194 changes: 194 additions & 0 deletions deployment/services/redpanda.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
import * as k8s from '@pulumi/kubernetes';
import * as pulumi from '@pulumi/pulumi';
import { Environment } from './environment';

export type Redpanda = ReturnType<typeof deployRedpanda>;

export function deployRedpanda(args: { environment: Environment }) {
const labels = { app: 'redpanda' };

// StatefulSet for Redpanda
const statefulSet = new k8s.apps.v1.StatefulSet('redpanda', {
metadata: {
name: 'redpanda',
},
spec: {
serviceName: 'redpanda',
replicas: args.environment.podsConfig.redpanda.replicas,
selector: {
matchLabels: labels,
},
template: {
metadata: {
labels,
},
spec: {
containers: [
{
name: 'redpanda',
image: 'redpandadata/redpanda:v25.3.1',
resources: {
limits: {
cpu: args.environment.podsConfig.redpanda.cpuLimit,
memory: args.environment.podsConfig.redpanda.memoryLimit,
},
},
args: [
'redpanda',
'start',
'--smp',
'1',
'--kafka-addr',
'PLAINTEXT://0.0.0.0:9092',
'--advertise-kafka-addr',
pulumi.interpolate`PLAINTEXT://\${HOSTNAME}.redpanda.default.svc.cluster.local:9092`,
],
ports: [
{ containerPort: 9092, name: 'kafka' },
{ containerPort: 8082, name: 'http' },
{ containerPort: 33145, name: 'rpc' },
{ containerPort: 9644, name: 'admin' },
],
volumeMounts: [
{
name: 'datadir',
mountPath: '/var/lib/redpanda/data',
},
],
livenessProbe: {
httpGet: {
path: '/ready',
port: 9644,
},
initialDelaySeconds: 10,
terminationGracePeriodSeconds: 60,
periodSeconds: 10,
failureThreshold: 5,
timeoutSeconds: 5,
},
readinessProbe: {
httpGet: {
path: '/ready',
port: 9644,
},
initialDelaySeconds: 10,
periodSeconds: 15,
failureThreshold: 5,
timeoutSeconds: 5,
},
},
],
},
},
volumeClaimTemplates: [
{
metadata: {
name: 'datadir',
},
spec: {
accessModes: ['ReadWriteOnce'],
resources: {
requests: {
storage: args.environment.podsConfig.redpanda.storageSize,
},
},
},
},
],
},
});

// Headless Service for StatefulSet (used for internal cluster communication)
const headlessService = new k8s.core.v1.Service('redpanda-headless', {
metadata: {
name: 'redpanda',
},
spec: {
clusterIP: 'None',
selector: labels,
ports: [
{ name: 'kafka', port: 9092, targetPort: 9092 },
{ name: 'http', port: 8082, targetPort: 8082 },
{ name: 'rpc', port: 33145, targetPort: 33145 },
{ name: 'admin', port: 9644, targetPort: 9644 },
],
},
});

// ClusterIP Service for clients (load balances across all pods)
const clientService = new k8s.core.v1.Service('redpanda-client-service', {
metadata: {
name: 'redpanda-client',
},
spec: {
type: 'ClusterIP',
selector: labels,
ports: [
{ name: 'kafka', port: 9092, targetPort: 9092 },
{ name: 'http', port: 8082, targetPort: 8082 },
],
},
});

// Create otel-traces topic
const topicCreationJob = new k8s.batch.v1.Job(
'redpanda-topic-creation',
{
metadata: {
name: 'redpanda-topic-creation',
},
spec: {
template: {
spec: {
restartPolicy: 'OnFailure',
containers: [
{
name: 'rpk',
image: 'redpandadata/redpanda:v25.3.1',
imagePullPolicy: 'Always',
command: [
'/bin/bash',
'-c',
`
# Wait for Redpanda to be ready
for i in {1..60}; do
if rpk cluster health --brokers redpanda-0.redpanda:9092 2>/dev/null | grep -q 'Healthy'; then
echo "Redpanda cluster is ready"
break
fi
echo "Waiting for Redpanda cluster... ($i/60)"
sleep 5
done

# Create topic with partitioning only (no replication)
rpk topic create otel-traces \\
--brokers redpanda-0.redpanda:9092 \\
--replicas 1 \\
--partitions 10 \\
--config retention.ms=2592000000 \\
--config compression.type=snappy \\
--config max.message.bytes=10485760 \\
|| echo "Topic may already exist"

# Verify topic creation
rpk topic describe otel-traces --brokers redpanda-0.redpanda:9092
`,
],
},
],
},
},
},
},
{ dependsOn: [statefulSet, headlessService] },
);

return {
statefulSet,
headlessService,
clientService,
topicCreationJob,
// Client service endpoint - auto-discovers all brokers
brokerEndpoint: 'redpanda-client:9092',
};
}
21 changes: 21 additions & 0 deletions docker/configs/otel-collector/builder-config-egress.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
dist:
version: 0.140.0
name: otelcol-custom
description: Custom OTel Collector distribution
output_path: ./otelcol-custom

receivers:
- gomod: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kafkareceiver v0.140.0

processors:
- gomod: go.opentelemetry.io/collector/processor/batchprocessor v0.140.0

exporters:
- gomod: go.opentelemetry.io/collector/exporter/debugexporter v0.140.0
- gomod:
github.com/open-telemetry/opentelemetry-collector-contrib/exporter/clickhouseexporter v0.140.0

extensions:
- gomod:
github.com/open-telemetry/opentelemetry-collector-contrib/extension/healthcheckextension
v0.140.0
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ processors:

exporters:
- gomod: go.opentelemetry.io/collector/exporter/debugexporter v0.140.0
- gomod:
github.com/open-telemetry/opentelemetry-collector-contrib/exporter/clickhouseexporter v0.140.0
- gomod: github.com/open-telemetry/opentelemetry-collector-contrib/exporter/kafkaexporter v0.140.0

extensions:
- gomod:
Expand Down
Loading
Loading