Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.local.example
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ MILVUS_ADDRESS=
MILVUS_TOKEN=
# Analytics
NEXT_PUBLIC_POSTHOG_KEY=
# PostHog personal API key — enables local evaluation of feature flags (no per-request API call).
# Obtain from https://us.posthog.com/settings/user-api-keys
# When set, REDIS_URL should also be configured to share flag definitions across server processes.
POSTHOG_PERSONAL_API_KEY=
# Sentry (error tracking)
SENTRY_ORG=
SENTRY_PROJECT=
Expand Down
2 changes: 1 addition & 1 deletion apps/web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
"openai": "6.29.0",
"p-limit": "catalog:",
"posthog-js": "1.360.2",
"posthog-node": "5.10.4",
"posthog-node": "5.34.2",
"react": "19.2.6",
"react-countup": "6.5.3",
"react-dom": "19.2.6",
Expand Down
66 changes: 66 additions & 0 deletions apps/web/src/lib/posthog-flag-cache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/**
* Redis-backed cache for PostHog feature flag definitions.
*
* In a multi-instance deployment (e.g. multiple Next.js server processes), we want
* only ONE instance to poll PostHog for flag updates while all instances share the
* cached results, avoiding redundant API calls.
*
* This uses a distributed lock pattern:
* - One instance acquires the lock and fetches flag definitions
* - Others skip fetching and read from the shared Redis cache
* - The lock is released after storing data so the next poll cycle re-elects
* - If the fetching instance crashes, the lock TTL ensures another takes over
*
* When Redis is unavailable, all instances fall through to PostHog API calls
* (the same behaviour as before this cache was added).
*
* Reference: https://posthog.com/docs/feature-flags/local-evaluation/distributed-environments
*/

import type {
FlagDefinitionCacheProvider,
FlagDefinitionCacheData,
} from 'posthog-node/experimental';
import { redisGet, redisSet, redisSetNX, redisDel } from '@/lib/redis';
import {
POSTHOG_FLAG_DEFINITIONS_REDIS_KEY,
POSTHOG_FLAG_CACHE_LOCK_REDIS_KEY,
} from '@/lib/redis-keys';

const LOCK_TTL_SECONDS = 90; // longer than the default 30s polling interval
const CACHE_TTL_SECONDS = 60 * 60 * 24; // 24 hours; flags are also held in memory

export function createPostHogFlagCache(): FlagDefinitionCacheProvider {
return {
async getFlagDefinitions(): Promise<FlagDefinitionCacheData | undefined> {
const cached = await redisGet(POSTHOG_FLAG_DEFINITIONS_REDIS_KEY);
return cached ? (JSON.parse(cached) as FlagDefinitionCacheData) : undefined;
},

async shouldFetchFlagDefinitions(): Promise<boolean> {
// Try to acquire the lock. Returns false if Redis is unavailable (fail open:
// every instance will then fall back to fetching independently).
try {
return await redisSetNX(POSTHOG_FLAG_CACHE_LOCK_REDIS_KEY, '1', LOCK_TTL_SECONDS);
} catch {
// Redis unavailable – let this instance fetch so flag evaluation is not broken.
return true;
}
},

async onFlagDefinitionsReceived(data: FlagDefinitionCacheData): Promise<void> {
await redisSet(POSTHOG_FLAG_DEFINITIONS_REDIS_KEY, JSON.stringify(data), CACHE_TTL_SECONDS);
// Release the lock so the next poll cycle can re-elect a leader.
await redisDel(POSTHOG_FLAG_CACHE_LOCK_REDIS_KEY);
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: redisDel errors propagate uncaught here, which may surface to the PostHog SDK's polling mechanism.

If redisSet at line 52 succeeds but redisDel throws (e.g. Redis timeout), the lock remains held for the full 90s TTL even though the data was already written to Redis. All other instances will wait up to 90s before the next poll cycle can elect a new leader. This is a latency issue: flag definitions will be stale until the lock expires.

Consider wrapping redisDel in a try/catch here (similar to how it's wrapped in shutdown()) so errors are silently absorbed rather than surfacing to the SDK:

try {
  await redisDel(POSTHOG_FLAG_CACHE_LOCK_REDIS_KEY);
} catch {
  // Lock TTL will expire it; not critical since data was already written
}

},

async shutdown(): Promise<void> {
// Best-effort lock release on graceful shutdown; ignore errors.
try {
await redisDel(POSTHOG_FLAG_CACHE_LOCK_REDIS_KEY);
} catch {
// intentionally ignored
}
},
};
}
33 changes: 24 additions & 9 deletions apps/web/src/lib/posthog.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import { getEnvVariable } from '@/lib/dotenvx';
import { PostHog } from 'posthog-node';
import type { FlagDefinitionCacheProvider } from 'posthog-node/experimental';
import { createPostHogFlagCache } from '@/lib/posthog-flag-cache';

let instance: PostHog | null = null;
let flagCache: FlagDefinitionCacheProvider | null = null;

export default function PostHogClient(): Pick<
PostHog,
Expand All @@ -21,28 +24,40 @@ export default function PostHogClient(): Pick<
alias: () => {},
};
}

// Local evaluation requires a personal API key. When present, flag checks are
// evaluated in-process against cached flag definitions instead of making a
// network call to PostHog on every request.
const personalApiKey = process.env.POSTHOG_PERSONAL_API_KEY;

if (personalApiKey && !flagCache) {
// Create the Redis-backed cache for sharing flag definitions across processes.
// Falls back gracefully to per-instance in-memory state when Redis is absent.
flagCache = createPostHogFlagCache();
}

// Single shared PostHog client for the process.
// Disabled outside production to avoid sending real events during tests/dev.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SUGGESTION: Stale comment — disabled is no longer passed to the PostHog constructor.

The original code passed disabled: !isProduction to the PostHog constructor. The new code relies on the early return at line 17-26 to prevent instantiation in non-production. The comment now implies there is still a disabled flag being set, which is misleading.

Suggested change
// Disabled outside production to avoid sending real events during tests/dev.
// Single shared PostHog client for the process — only created in production.

instance = new PostHog(isProduction ? key : key || 'disabled', {
instance = new PostHog(key, {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: PostHog is instantiated with an empty string key (key = '') if NEXT_PUBLIC_POSTHOG_KEY is unset.

The original code used isProduction ? key : key || 'disabled' to fall back to 'disabled' and explicitly passed disabled: !isProduction. Now, the code path only reaches this line when isProduction is true, so passing an empty-string key to PostHog in production (when the env var is missing) will result in silent failures on every event capture. Previously, the disabled option would have made this a no-op.

Consider an early guard:

if (!key) {
  // misconfigured — return stub to avoid silent event loss
  return { capture: () => {}, isFeatureEnabled: async () => false, getFeatureFlag: async () => undefined, debug: () => {}, getFeatureFlagPayload: async () => undefined, alias: () => {} };
}

host: 'https://us.i.posthog.com',
flushAt: 1,
flushInterval: 0,
disabled: !isProduction,
...(personalApiKey
? {
personalApiKey,
enableLocalEvaluation: true,
flagDefinitionCacheProvider: flagCache ?? undefined,
}
: {}),
});

// if (!isProduction) {
// // eslint-disable-next-line @typescript-eslint/no-explicit-any
// (instance as any).capture = function (...args: any[]) {
// console.log('POSTHOG CAPTURE', ...args);
// };
// }

return instance;
}

export async function shutdownPosthog(): Promise<void> {
if (instance) {
await instance.shutdown();
instance = null;
flagCache = null;
}
}
6 changes: 6 additions & 0 deletions apps/web/src/lib/redis-keys.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,9 @@ export const modelExperimentRedisKey = (publicId: string) =>

export const gitLabOAuthCredentialsRedisKey = (credentialRef: string) =>
redisKey(`auth-credentials:gitlab:${credentialRef}`);

/** Cached PostHog feature flag definitions for local evaluation (JSON blob). */
export const POSTHOG_FLAG_DEFINITIONS_REDIS_KEY = redisKey('posthog:flags:definitions');

/** Distributed lock key used to elect a single leader for polling PostHog flag definitions. */
export const POSTHOG_FLAG_CACHE_LOCK_REDIS_KEY = redisKey('posthog:flags:lock');
25 changes: 25 additions & 0 deletions apps/web/src/lib/redis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,31 @@ export async function redisSet(
}
}

/**
* Atomically sets a key only if it does not already exist (SET NX).
* Returns true if the key was set (lock acquired), false if the key already existed.
* Returns false if Redis is not configured (REDIS_URL unset).
*/
export async function redisSetNX(
key: RedisKey,
value: string,
ttlSeconds: number
): Promise<boolean> {
const c = getOrCreateClient();
if (!c) return false;
try {
await withTimeout(ensureConnected(c), CONNECT_TIMEOUT_MS);
const result = await withTimeout(
c.set(key, value, { NX: true, EX: ttlSeconds }),
COMMAND_TIMEOUT_MS
);
return result === 'OK';
} catch (err) {
captureException(err, { tags: { service: 'redis', operation: 'setnx' }, extra: { key } });
throw err;
}
}

/** Returns false if Redis is not configured (REDIS_URL unset). */
export async function redisDel(key: RedisKey): Promise<boolean> {
const c = getOrCreateClient();
Expand Down
41 changes: 28 additions & 13 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.