From 6bb49974c0e15c94bdb75d3a058c0c760ad2ca13 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Thu, 4 Dec 2025 19:51:54 +0530 Subject: [PATCH 001/221] build openpanel image ci --- .github/workflows/docker-build.yml | 192 +++++------------------------ 1 file changed, 32 insertions(+), 160 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 9717b531d..7ec28a88a 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -3,19 +3,16 @@ name: Docker Build and Push on: workflow_dispatch: push: - # branches: [ "main" ] + branches: [ "main" ] paths: - "apps/api/**" - "apps/worker/**" - - "apps/public/**" + - "apps/start/**" - "packages/**" - "!packages/sdks/**" - "**Dockerfile" - ".github/workflows/**" -env: - repo_owner: "openpanel-dev" - jobs: changes: runs-on: ubuntu-latest @@ -48,89 +45,28 @@ jobs: - 'packages/**' - '.github/workflows/**' - lint-and-test: - needs: changes - if: ${{ needs.changes.outputs.api == 'true' || needs.changes.outputs.worker == 'true' || needs.changes.outputs.public == 'true' || needs.changes.outputs.dashboard == 'true' }} - runs-on: ubuntu-latest - services: - redis: - image: redis:7-alpine - ports: - - 6379:6379 - options: >- - --health-cmd "redis-cli ping || exit 1" - --health-interval 5s - --health-timeout 3s - --health-retries 20 - steps: - - uses: actions/checkout@v4 - - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: "20" - - - name: Install pnpm - uses: pnpm/action-setup@v4 - - - name: Get pnpm store directory - shell: bash - run: | - echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV - - - name: Setup pnpm cache - uses: actions/cache@v3 - with: - path: ${{ env.STORE_PATH }} - key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }} - restore-keys: | - ${{ runner.os }}-pnpm-store- - - - name: Install dependencies - run: pnpm install - - - name: Codegen - run: pnpm codegen - - # - name: Run Biome - # run: pnpm lint - - # - name: Run TypeScript checks - # run: pnpm typecheck - - # - name: Run tests - # run: pnpm test - build-and-push-api: - permissions: - packages: write - contents: write - needs: [changes, lint-and-test] + needs: changes if: ${{ needs.changes.outputs.api == 'true' }} runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Generate tags - id: tags + - name: Generate short SHA + id: short-sha run: | - # Sanitize branch name by replacing / with - - BRANCH_NAME=$(echo "${{ github.ref_name }}" | sed 's/\//-/g') - # Get first 4 characters of commit SHA - SHORT_SHA=$(echo "${{ github.sha }}" | cut -c1-4) - echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT - echo "short_sha=$SHORT_SHA" >> $GITHUB_OUTPUT + echo "sha=$(echo ${{ github.sha }} | cut -c1-7)" >> $GITHUB_OUTPUT - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Log in to GitHub Container Registry + - name: Login to Azure Container Registry uses: docker/login-action@v3 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + registry: ${{ secrets.AZURE_DASH_REGISTRY_URL }} + username: ${{ secrets.AZURE_DASH_REGISTRY_CLIENT_ID }} + password: ${{ secrets.AZURE_DASH_REGISTRY_CLIENT_SECRET }} - name: Build and push Docker image uses: docker/build-push-action@v6 @@ -141,57 +77,33 @@ jobs: cache-from: type=gha cache-to: type=gha,mode=max tags: | - ghcr.io/${{ env.repo_owner }}/api:${{ steps.tags.outputs.branch_name }}-${{ steps.tags.outputs.short_sha }} + ${{ secrets.AZURE_DASH_REGISTRY_URL }}/api:${{ steps.short-sha.outputs.sha }} + ${{ secrets.AZURE_DASH_REGISTRY_URL }}/api:latest build-args: | DATABASE_URL=postgresql://dummy:dummy@localhost:5432/dummy - - name: Create/Update API tag - if: github.ref == 'refs/heads/main' - run: | - # Delete existing tag if it exists - if git tag -l "api" | grep -q "api"; then - git tag -d "api" - echo "Deleted local tag: api" - fi - - # Create new tag - git tag "api" "${{ github.sha }}" - echo "Created tag: api" - - # Push tag to remote - git push origin "api" --force - echo "Pushed tag: api" - build-and-push-worker: - permissions: - packages: write - contents: write - needs: [changes, lint-and-test] + needs: changes if: ${{ needs.changes.outputs.worker == 'true' }} runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Generate tags - id: tags + - name: Generate short SHA + id: short-sha run: | - # Sanitize branch name by replacing / with - - BRANCH_NAME=$(echo "${{ github.ref_name }}" | sed 's/\//-/g') - # Get first 4 characters of commit SHA - SHORT_SHA=$(echo "${{ github.sha }}" | cut -c1-4) - echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT - echo "short_sha=$SHORT_SHA" >> $GITHUB_OUTPUT + echo "sha=$(echo ${{ github.sha }} | cut -c1-7)" >> $GITHUB_OUTPUT - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Log in to GitHub Container Registry + - name: Login to Azure Container Registry uses: docker/login-action@v3 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + registry: ${{ secrets.AZURE_DASH_REGISTRY_URL }} + username: ${{ secrets.AZURE_DASH_REGISTRY_CLIENT_ID }} + password: ${{ secrets.AZURE_DASH_REGISTRY_CLIENT_SECRET }} - name: Build and push Docker image uses: docker/build-push-action@v6 @@ -202,57 +114,33 @@ jobs: cache-from: type=gha cache-to: type=gha,mode=max tags: | - ghcr.io/${{ env.repo_owner }}/worker:${{ steps.tags.outputs.branch_name }}-${{ steps.tags.outputs.short_sha }} + ${{ secrets.AZURE_DASH_REGISTRY_URL }}/worker:${{ steps.short-sha.outputs.sha }} + ${{ secrets.AZURE_DASH_REGISTRY_URL }}/worker:latest build-args: | DATABASE_URL=postgresql://dummy:dummy@localhost:5432/dummy - - name: Create/Update Worker tag - if: github.ref == 'refs/heads/main' - run: | - # Delete existing tag if it exists - if git tag -l "worker" | grep -q "worker"; then - git tag -d "worker" - echo "Deleted local tag: worker" - fi - - # Create new tag - git tag "worker" "${{ github.sha }}" - echo "Created tag: worker" - - # Push tag to remote - git push origin "worker" --force - echo "Pushed tag: worker" - build-and-push-dashboard: - permissions: - packages: write - contents: write - needs: [changes, lint-and-test] + needs: changes if: ${{ needs.changes.outputs.dashboard == 'true' }} runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Generate tags - id: tags + - name: Generate short SHA + id: short-sha run: | - # Sanitize branch name by replacing / with - - BRANCH_NAME=$(echo "${{ github.ref_name }}" | sed 's/\//-/g') - # Get first 4 characters of commit SHA - SHORT_SHA=$(echo "${{ github.sha }}" | cut -c1-4) - echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT - echo "short_sha=$SHORT_SHA" >> $GITHUB_OUTPUT + echo "sha=$(echo ${{ github.sha }} | cut -c1-7)" >> $GITHUB_OUTPUT - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Log in to GitHub Container Registry + - name: Login to Azure Container Registry uses: docker/login-action@v3 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + registry: ${{ secrets.AZURE_DASH_REGISTRY_URL }} + username: ${{ secrets.AZURE_DASH_REGISTRY_CLIENT_ID }} + password: ${{ secrets.AZURE_DASH_REGISTRY_CLIENT_SECRET }} - name: Build and push Docker image uses: docker/build-push-action@v6 @@ -263,23 +151,7 @@ jobs: cache-from: type=gha cache-to: type=gha,mode=max tags: | - ghcr.io/${{ env.repo_owner }}/dashboard:${{ steps.tags.outputs.branch_name }}-${{ steps.tags.outputs.short_sha }} + ${{ secrets.AZURE_DASH_REGISTRY_URL }}/dashboard:${{ steps.short-sha.outputs.sha }} + ${{ secrets.AZURE_DASH_REGISTRY_URL }}/dashboard:latest build-args: | NO_CLOUDFLARE=1 - - - name: Create/Update Dashboard tag - if: github.ref == 'refs/heads/main' - run: | - # Delete existing tag if it exists - if git tag -l "dashboard" | grep -q "dashboard"; then - git tag -d "dashboard" - echo "Deleted local tag: dashboard" - fi - - # Create new tag - git tag "dashboard" "${{ github.sha }}" - echo "Created tag: dashboard" - - # Push tag to remote - git push origin "dashboard" --force - echo "Pushed tag: dashboard" From 604baa50af89866d11b20b36a52bd26b219a76f8 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Fri, 5 Dec 2025 18:57:09 +0530 Subject: [PATCH 002/221] update image tags --- .github/workflows/docker-build.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 7ec28a88a..9f2d2407a 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -77,8 +77,8 @@ jobs: cache-from: type=gha cache-to: type=gha,mode=max tags: | - ${{ secrets.AZURE_DASH_REGISTRY_URL }}/api:${{ steps.short-sha.outputs.sha }} - ${{ secrets.AZURE_DASH_REGISTRY_URL }}/api:latest + ${{ secrets.AZURE_DASH_REGISTRY_URL }}/openpanel-api:${{ steps.short-sha.outputs.sha }} + ${{ secrets.AZURE_DASH_REGISTRY_URL }}/openpanel-api:latest build-args: | DATABASE_URL=postgresql://dummy:dummy@localhost:5432/dummy @@ -114,8 +114,8 @@ jobs: cache-from: type=gha cache-to: type=gha,mode=max tags: | - ${{ secrets.AZURE_DASH_REGISTRY_URL }}/worker:${{ steps.short-sha.outputs.sha }} - ${{ secrets.AZURE_DASH_REGISTRY_URL }}/worker:latest + ${{ secrets.AZURE_DASH_REGISTRY_URL }}/openpanel-worker:${{ steps.short-sha.outputs.sha }} + ${{ secrets.AZURE_DASH_REGISTRY_URL }}/openpanel-worker:latest build-args: | DATABASE_URL=postgresql://dummy:dummy@localhost:5432/dummy @@ -151,7 +151,7 @@ jobs: cache-from: type=gha cache-to: type=gha,mode=max tags: | - ${{ secrets.AZURE_DASH_REGISTRY_URL }}/dashboard:${{ steps.short-sha.outputs.sha }} - ${{ secrets.AZURE_DASH_REGISTRY_URL }}/dashboard:latest + ${{ secrets.AZURE_DASH_REGISTRY_URL }}/openpanel-dashboard:${{ steps.short-sha.outputs.sha }} + ${{ secrets.AZURE_DASH_REGISTRY_URL }}/openpanel-dashboard:latest build-args: | NO_CLOUDFLARE=1 From 05e8f85104c77a34728dc11aea5bde4ff3cb16d5 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Fri, 5 Dec 2025 19:08:21 +0530 Subject: [PATCH 003/221] add WHERE clause to fix conversion charts --- packages/db/src/services/conversion.service.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/db/src/services/conversion.service.ts b/packages/db/src/services/conversion.service.ts index f7e7b86b9..9b958a3c1 100644 --- a/packages/db/src/services/conversion.service.ts +++ b/packages/db/src/services/conversion.service.ts @@ -104,7 +104,7 @@ export class ConversionService { nullIf(min(b.b_time), '1970-01-01 00:00:00.000') AS conversion_time FROM event_a AS a LEFT JOIN event_b AS b ON a.${group} = b.${group} - AND b.b_time BETWEEN a.a_time AND a.a_time + INTERVAL ${funnelWindow} HOUR + WHERE b.b_time BETWEEN a.a_time AND a.a_time + INTERVAL ${funnelWindow} HOUR GROUP BY a.${group}, a.a_time, a.event_day${breakdownGroupBy.length ? `, ${breakdownGroupBy.join(', ')}` : ''}) `), ) From f79388e11343a680a5c77258dc883a064689f031 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Sat, 6 Dec 2025 14:15:43 +0530 Subject: [PATCH 004/221] fix conversion chart --- packages/db/src/services/conversion.service.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/db/src/services/conversion.service.ts b/packages/db/src/services/conversion.service.ts index 9b958a3c1..31c53734d 100644 --- a/packages/db/src/services/conversion.service.ts +++ b/packages/db/src/services/conversion.service.ts @@ -101,10 +101,9 @@ export class ConversionService { a.a_time, a.event_day, ${breakdownGroupBy.length ? `${breakdownGroupBy.join(', ')},` : ''} - nullIf(min(b.b_time), '1970-01-01 00:00:00.000') AS conversion_time + nullIf(min(CASE WHEN b.b_time BETWEEN a.a_time AND a.a_time + INTERVAL ${funnelWindow} HOUR THEN b.b_time END), '1970-01-01 00:00:00.000') AS conversion_time FROM event_a AS a LEFT JOIN event_b AS b ON a.${group} = b.${group} - WHERE b.b_time BETWEEN a.a_time AND a.a_time + INTERVAL ${funnelWindow} HOUR GROUP BY a.${group}, a.a_time, a.event_day${breakdownGroupBy.length ? `, ${breakdownGroupBy.join(', ')}` : ''}) `), ) From 1aa6144cbcb38edb884292e2b6dccb481d4182d9 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Sat, 6 Dec 2025 17:03:35 +0530 Subject: [PATCH 005/221] fix profile filter and funnel charts (#2) --- packages/db/src/services/chart.service.ts | 19 +++---------------- packages/trpc/src/routers/chart.ts | 8 ++++---- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/packages/db/src/services/chart.service.ts b/packages/db/src/services/chart.service.ts index bc6432283..fe09dd2da 100644 --- a/packages/db/src/services/chart.service.ts +++ b/packages/db/src/services/chart.service.ts @@ -157,21 +157,8 @@ export function getChartSql({ if (anyFilterOnProfile || anyBreakdownOnProfile) { const profileFields = getProfileFields(); const selectFields = profileFields.map((field) => { - if (field === 'id') { - return 'id as "profile.id"'; - } - if (field === 'properties') { - return 'properties as "profile.properties"'; - } - if (field === 'email') { - return 'email as "profile.email"'; - } - if (field === 'first_name') { - return 'first_name as "profile.first_name"'; - } - if (field === 'last_name') { - return 'last_name as "profile.last_name"'; - } + // Keep original column names without aliases + // so they can be accessed as profile.properties, profile.email, etc. return field; }); @@ -179,7 +166,7 @@ export function getChartSql({ addCte( 'profile', `SELECT ${selectFields.join(', ')} - FROM ${TABLE_NAMES.profiles} FINAL + FROM ${TABLE_NAMES.profiles} FINAL WHERE project_id = ${sqlstring.escape(projectId)}`, ); diff --git a/packages/trpc/src/routers/chart.ts b/packages/trpc/src/routers/chart.ts index 0debd31fe..7e7053d0f 100644 --- a/packages/trpc/src/routers/chart.ts +++ b/packages/trpc/src/routers/chart.ts @@ -526,12 +526,12 @@ export const chartRouter = createTRPCRouter({ GROUP BY cohort_interval ) SELECT - cohort_interval, - cohort_sizes.total_first_event_count, + interval_users.cohort_interval, + cs.total_first_event_count, ${countsSelect} FROM interval_users - LEFT JOIN cohort_sizes AS cs ON cohort_interval = cs.cohort_interval - ORDER BY cohort_interval ASC + LEFT JOIN cohort_sizes AS cs ON interval_users.cohort_interval = cs.cohort_interval + ORDER BY interval_users.cohort_interval ASC `; const cohortData = await chQuery<{ From 1d1b1a1206c6b4fbc66e22d08170d75e9d34981e Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Sat, 6 Dec 2025 17:58:28 +0530 Subject: [PATCH 006/221] fix charts breakdown charts (#3) --- packages/db/src/services/chart.service.ts | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/packages/db/src/services/chart.service.ts b/packages/db/src/services/chart.service.ts index fe09dd2da..3b7b35659 100644 --- a/packages/db/src/services/chart.service.ts +++ b/packages/db/src/services/chart.service.ts @@ -314,13 +314,16 @@ export function getChartSql({ // Note: The profile CTE (if it exists) is available in subqueries, so we can reference it directly if (breakdowns.length > 0) { // Match breakdown properties in subquery with outer query's grouped values - // Since outer query groups by label_X, we reference those in the correlation + // Since outer query groups by label_X, we need to use the actual property expression + // in both the outer query and the subquery for correlation const breakdownMatches = breakdowns .map((b, index) => { const propertyKey = getSelectPropertyKey(b.name); - // Correlate: match the property expression with outer query's label_X value - // ClickHouse allows referencing outer query columns in correlated subqueries - return `${propertyKey} = label_${index + 1}`; + const outerPropertyKey = propertyKey.replace(/\be\./g, 'e.'); + const innerPropertyKey = propertyKey.replace(/\be\./g, 'e2.'); + // Correlate: match inner query's property with outer query's same property + // Reference the outer query's table using e. prefix + return `${innerPropertyKey} = ${outerPropertyKey}`; }) .join(' AND '); From d8a62c5a1019894198c9b5e87643e8ee0bb149fb Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Tue, 9 Dec 2025 02:38:23 +0530 Subject: [PATCH 007/221] expose profile created at timestamps for filtering (#4) --- packages/db/src/services/chart.service.ts | 9 +++++++-- packages/trpc/src/routers/profile.ts | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/packages/db/src/services/chart.service.ts b/packages/db/src/services/chart.service.ts index 3b7b35659..65199e368 100644 --- a/packages/db/src/services/chart.service.ts +++ b/packages/db/src/services/chart.service.ts @@ -38,6 +38,11 @@ export function getSelectPropertyKey(property: string) { return `if(profile_id != device_id, 'true', 'false')`; } + // Handle profile.created_at - it's stored as created_at in the profiles table + if (property === 'profile.created_at') { + return 'profile.created_at'; + } + const propertyPatterns = ['properties', 'profile.properties']; const match = propertyPatterns.find((pattern) => @@ -123,7 +128,7 @@ export function getChartSql({ fields.add('properties'); } else if ( fieldName && - ['email', 'first_name', 'last_name'].includes(fieldName) + ['email', 'first_name', 'last_name', 'created_at'].includes(fieldName) ) { fields.add(fieldName); } @@ -138,7 +143,7 @@ export function getChartSql({ fields.add('properties'); } else if ( fieldName && - ['email', 'first_name', 'last_name'].includes(fieldName) + ['email', 'first_name', 'last_name', 'created_at'].includes(fieldName) ) { fields.add(fieldName); } diff --git a/packages/trpc/src/routers/profile.ts b/packages/trpc/src/routers/profile.ts index 64159f493..b9c2dd627 100644 --- a/packages/trpc/src/routers/profile.ts +++ b/packages/trpc/src/routers/profile.ts @@ -65,7 +65,7 @@ export const profileRouter = createTRPCRouter({ .map((item) => item.replace(/\.([0-9]+)/g, '[*]')) .map((item) => `properties.${item}`); - properties.push('id', 'first_name', 'last_name', 'email'); + properties.push('id', 'first_name', 'last_name', 'email', 'created_at'); return pipe( sort((a, b) => a.length - b.length), From 8bd653b0a37bcd0c58f36a6bb93bee87678b322e Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Sun, 14 Dec 2025 00:29:11 +0530 Subject: [PATCH 008/221] remove event retention --- packages/queue/src/queues.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/queue/src/queues.ts b/packages/queue/src/queues.ts index e1e64645e..4811a9e9a 100644 --- a/packages/queue/src/queues.ts +++ b/packages/queue/src/queues.ts @@ -151,8 +151,8 @@ export const eventsGroupQueues = Array.from({ list.length === 1 ? 'group_events' : `group_events_${index}`, ), redis: getRedisGroupQueue(), - keepCompleted: 1_000, - keepFailed: 10_000, + keepCompleted: 0, + keepFailed: 0, orderingDelayMs: orderingDelayMs, autoBatch: autoBatchMaxWaitMs && autoBatchSize From 8266bf76db46fd58feecafb4713d679d087bd843 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Sun, 14 Dec 2025 01:04:17 +0530 Subject: [PATCH 009/221] add group mq to api --- apps/api/package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/api/package.json b/apps/api/package.json index 5e20851a2..5cfb082c7 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -38,6 +38,7 @@ "fastify": "^5.6.1", "fastify-metrics": "^12.1.0", "fastify-raw-body": "^5.0.0", + "groupmq": "1.1.1-next.2", "jsonwebtoken": "^9.0.2", "ramda": "^0.29.1", "sharp": "^0.33.5", From c8df7c6faa3df860624c627d71038fe32a3b5555 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Sun, 14 Dec 2025 01:08:58 +0530 Subject: [PATCH 010/221] add group mq to api --- pnpm-lock.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 206697141..d7def8329 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -189,6 +189,9 @@ importers: fastify-raw-body: specifier: ^5.0.0 version: 5.0.0 + groupmq: + specifier: 1.1.1-next.2 + version: 1.1.1-next.2(ioredis@5.8.2) jsonwebtoken: specifier: ^9.0.2 version: 9.0.2 From b4d3e4f06bfc05072a0f13fc528f36111d48f6be Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Sun, 14 Dec 2025 01:11:39 +0530 Subject: [PATCH 011/221] add group mq to api --- apps/api/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/package.json b/apps/api/package.json index 5cfb082c7..86ade5c98 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -65,4 +65,4 @@ "tsdown": "0.14.2", "typescript": "catalog:" } -} \ No newline at end of file +} From bf95bc1cd8cf30fca30e788d8e835adc5ba999a3 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Sun, 14 Dec 2025 16:50:31 +0530 Subject: [PATCH 012/221] Fetch remote v1 (#5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: ability to paus buffer based on cron queue * fix: add groupmq back to api --------- Co-authored-by: Carl-Gerhard Lindesvärd --- apps/api/package.json | 1 + apps/worker/package.json | 2 +- packages/db/src/buffers/base-buffer.ts | 6 ++++++ packages/queue/package.json | 2 +- pnpm-lock.yaml | 10 +++++++--- pnpm-workspace.yaml | 1 + 6 files changed, 17 insertions(+), 5 deletions(-) diff --git a/apps/api/package.json b/apps/api/package.json index 86ade5c98..60ef38ae6 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -29,6 +29,7 @@ "@openpanel/logger": "workspace:*", "@openpanel/payments": "workspace:*", "@openpanel/queue": "workspace:*", + "groupmq": "catalog:", "@openpanel/redis": "workspace:*", "@openpanel/trpc": "workspace:*", "@openpanel/validation": "workspace:*", diff --git a/apps/worker/package.json b/apps/worker/package.json index ac7e87c36..c8ae019e3 100644 --- a/apps/worker/package.json +++ b/apps/worker/package.json @@ -24,7 +24,7 @@ "@openpanel/redis": "workspace:*", "bullmq": "^5.63.0", "express": "^4.18.2", - "groupmq": "1.1.1-next.2", + "groupmq": "catalog:", "prom-client": "^15.1.3", "ramda": "^0.29.1", "source-map-support": "^0.5.21", diff --git a/packages/db/src/buffers/base-buffer.ts b/packages/db/src/buffers/base-buffer.ts index 07211e1ab..cc8421004 100644 --- a/packages/db/src/buffers/base-buffer.ts +++ b/packages/db/src/buffers/base-buffer.ts @@ -1,5 +1,6 @@ import { generateSecureId } from '@openpanel/common/server'; import { type ILogger, createLogger } from '@openpanel/logger'; +import { cronQueue } from '@openpanel/queue'; import { getRedisCache, runEvery } from '@openpanel/redis'; export class BaseBuffer { @@ -94,6 +95,11 @@ export class BaseBuffer { async tryFlush() { const now = performance.now(); + const isCronQueuePaused = await cronQueue.isPaused(); + if (isCronQueuePaused) { + this.logger.info('Cron queue is paused, skipping flush'); + return; + } // Parallel mode: No locking, multiple workers can process simultaneously if (this.enableParallelProcessing) { diff --git a/packages/queue/package.json b/packages/queue/package.json index dfef3a0e9..a440e7bc5 100644 --- a/packages/queue/package.json +++ b/packages/queue/package.json @@ -11,7 +11,7 @@ "@openpanel/logger": "workspace:*", "@openpanel/redis": "workspace:*", "bullmq": "^5.63.0", - "groupmq": "1.1.1-next.2" + "groupmq": "catalog:" }, "devDependencies": { "@openpanel/sdk": "workspace:*", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d7def8329..1c227c1b3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -15,6 +15,9 @@ catalogs: '@types/react-dom': specifier: ^19.0.0 version: 19.1.8 + groupmq: + specifier: 1.1.1-next.2 + version: 1.1.1-next.2 react: specifier: ^19.0.0 version: 19.1.1 @@ -190,7 +193,7 @@ importers: specifier: ^5.0.0 version: 5.0.0 groupmq: - specifier: 1.1.1-next.2 + specifier: 'catalog:' version: 1.1.1-next.2(ioredis@5.8.2) jsonwebtoken: specifier: ^9.0.2 @@ -889,7 +892,7 @@ importers: specifier: ^4.18.2 version: 4.18.2 groupmq: - specifier: 1.1.1-next.2 + specifier: 'catalog:' version: 1.1.1-next.2(ioredis@5.8.2) prom-client: specifier: ^15.1.3 @@ -1356,7 +1359,7 @@ importers: specifier: ^5.63.0 version: 5.63.0 groupmq: - specifier: 1.1.1-next.2 + specifier: 'catalog:' version: 1.1.1-next.2(ioredis@5.8.2) devDependencies: '@openpanel/sdk': @@ -13188,6 +13191,7 @@ packages: next@15.0.3: resolution: {integrity: sha512-ontCbCRKJUIoivAdGB34yCaOcPgYXr9AAkV/IwqFfWWTXEPUgLYkSkqBhIk9KK7gGmgjc64B+RdoeIDM13Irnw==} engines: {node: ^18.18.0 || ^19.8.0 || >= 20.0.0} + deprecated: This version has a security vulnerability. Please upgrade to a patched version. See https://nextjs.org/blog/CVE-2025-66478 for more details. hasBin: true peerDependencies: '@opentelemetry/api': ^1.1.0 diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 30fcbb896..49c3cf43a 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -13,3 +13,4 @@ catalog: "@types/react-dom": ^19.0.0 "@types/node": ^24.7.1 typescript: ^5.9.3 + groupmq: 1.1.1-next.2 From 98703f68875a11de434b2379eed39ebc6ea036be Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Mon, 15 Dec 2025 14:02:32 +0530 Subject: [PATCH 013/221] enable shard distribution (#6) --- apps/worker/src/boot-workers.ts | 76 ++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/apps/worker/src/boot-workers.ts b/apps/worker/src/boot-workers.ts index bcce6a802..a83a41726 100644 --- a/apps/worker/src/boot-workers.ts +++ b/apps/worker/src/boot-workers.ts @@ -64,6 +64,76 @@ function getEnabledQueues(): QueueName[] { return queues; } +function getPodIndex(): number { + const hostname = process.env.HOSTNAME || ''; + + // Extract numeric index from StatefulSet pod name (e.g., openpanel-worker-0 -> 0) + const match = hostname.match(/-(\d+)$/); + if (match) { + return Number.parseInt(match[1], 10); + } + + // Fallback to 0 if no numeric suffix found + logger.warn('Could not extract pod index from hostname, defaulting to 0', { + hostname, + }); + return 0; +} + +function getAutoPartitionedQueues(): QueueName[] { + const enabledQueues = getEnabledQueues(); + + const enableShardDistribution = + process.env.ENABLE_SHARD_DISTRIBUTION === 'true'; + + if (!enableShardDistribution) { + return enabledQueues; + } + + if (!enabledQueues.includes('events')) { + return enabledQueues; + } + + const totalPods = Number.parseInt(process.env.TOTAL_POD || '1', 10); + + if (totalPods <= 1) { + logger.info('Shard distribution disabled: TOTAL_POD not set or = 1', { + totalShards: EVENTS_GROUP_QUEUES_SHARDS, + }); + return enabledQueues; + } + + const podIndex = getPodIndex(); + const shardsPerPod = Math.floor(EVENTS_GROUP_QUEUES_SHARDS / totalPods); + const remainderShards = EVENTS_GROUP_QUEUES_SHARDS % totalPods; + const extraShard = podIndex < remainderShards ? 1 : 0; + const startShard = + podIndex * shardsPerPod + Math.min(podIndex, remainderShards); + const endShard = startShard + shardsPerPod + extraShard; + + const specificShards = Array.from( + { length: endShard - startShard }, + (_, i) => `events_${startShard + i}`, + ); + + const partitionedQueues = [ + ...enabledQueues.filter((q) => q !== 'events'), + ...specificShards, + ]; + + logger.info('Shard distribution enabled', { + hostname: process.env.HOSTNAME, + podIndex, + totalPods, + totalShards: EVENTS_GROUP_QUEUES_SHARDS, + assignedShards: `${startShard}-${endShard - 1}`, + shardsCount: endShard - startShard, + queues: partitionedQueues, + }); + + return partitionedQueues; +} + /** * Gets the concurrency setting for a queue from environment variables. * Env var format: {QUEUE_NAME}_CONCURRENCY (e.g., EVENTS_0_CONCURRENCY=32) @@ -83,7 +153,11 @@ function getConcurrencyFor(queueName: string, defaultValue = 1): number { } export async function bootWorkers() { - const enabledQueues = getEnabledQueues(); + const enableShardDistribution = + process.env.ENABLE_SHARD_DISTRIBUTION === 'true'; + const enabledQueues = enableShardDistribution + ? getAutoPartitionedQueues() + : getEnabledQueues(); const workers: (Worker | GroupWorker)[] = []; From 1ea7ffcad6bae20de8f99fc438a17347abbd7935 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Mon, 22 Dec 2025 22:31:02 +0530 Subject: [PATCH 014/221] add limits in breakdown charts (#7) --- apps/start/src/components/report/reportSlice.ts | 1 + packages/validation/src/index.ts | 1 + 2 files changed, 2 insertions(+) diff --git a/apps/start/src/components/report/reportSlice.ts b/apps/start/src/components/report/reportSlice.ts index 9ec30e32b..19e10bb22 100644 --- a/apps/start/src/components/report/reportSlice.ts +++ b/apps/start/src/components/report/reportSlice.ts @@ -78,6 +78,7 @@ export const reportSlice = createSlice({ return { ...state, ...action.payload, + limit: action.payload.limit ?? state.limit, startDate: null, endDate: null, dirty: false, diff --git a/packages/validation/src/index.ts b/packages/validation/src/index.ts index bc8777ee2..0cc009a03 100644 --- a/packages/validation/src/index.ts +++ b/packages/validation/src/index.ts @@ -182,6 +182,7 @@ export const zChartInputBase = z.object({ limit: z .number() .optional() + .default(500) .describe('Limit how many series should be returned'), offset: z .number() From c9649d3852bed6d5e1e3e78e5f356ac5f955953f Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Mon, 22 Dec 2025 22:43:00 +0530 Subject: [PATCH 015/221] fix api build (#8) --- apps/api/package.json | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/api/package.json b/apps/api/package.json index 60ef38ae6..f15240e51 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -39,7 +39,6 @@ "fastify": "^5.6.1", "fastify-metrics": "^12.1.0", "fastify-raw-body": "^5.0.0", - "groupmq": "1.1.1-next.2", "jsonwebtoken": "^9.0.2", "ramda": "^0.29.1", "sharp": "^0.33.5", From 654104ecbb52bcdf60fbe1361f1f40fe9980a109 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Mon, 22 Dec 2025 23:31:53 +0530 Subject: [PATCH 016/221] optimise breakdown queries (#9) --- packages/db/src/services/chart.service.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/packages/db/src/services/chart.service.ts b/packages/db/src/services/chart.service.ts index 99b2ee453..724251614 100644 --- a/packages/db/src/services/chart.service.ts +++ b/packages/db/src/services/chart.service.ts @@ -303,10 +303,12 @@ export function getChartSql({ const breakdownMatches = breakdowns .map((b, index) => { const propertyKey = getSelectPropertyKey(b.name); - const outerPropertyKey = propertyKey.replace(/\be\./g, 'e.'); - const innerPropertyKey = propertyKey.replace(/\be\./g, 'e2.'); - // Correlate: match inner query's property with outer query's same property - // Reference the outer query's table using e. prefix + const outerPropertyKey = propertyKey.startsWith('e.') + ? propertyKey + : `e.${propertyKey}`; + const innerPropertyKey = propertyKey.startsWith('e.') + ? propertyKey.replace(/^e\./, 'e2.') + : `e2.${propertyKey}`; return `${innerPropertyKey} = ${outerPropertyKey}`; }) .join(' AND '); From 6d3e86e3fcb01ff8508baf4eee5bbb2394203051 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Tue, 23 Dec 2025 01:09:20 +0530 Subject: [PATCH 017/221] revert breakdown queries, migrated to clickhouse 25.3 (#10) --- packages/db/src/services/chart.service.ts | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/packages/db/src/services/chart.service.ts b/packages/db/src/services/chart.service.ts index 724251614..6d2936479 100644 --- a/packages/db/src/services/chart.service.ts +++ b/packages/db/src/services/chart.service.ts @@ -298,18 +298,13 @@ export function getChartSql({ // Note: The profile CTE (if it exists) is available in subqueries, so we can reference it directly if (breakdowns.length > 0) { // Match breakdown properties in subquery with outer query's grouped values - // Since outer query groups by label_X, we need to use the actual property expression - // in both the outer query and the subquery for correlation + // Since outer query groups by label_X, we reference those in the correlation const breakdownMatches = breakdowns .map((b, index) => { const propertyKey = getSelectPropertyKey(b.name); - const outerPropertyKey = propertyKey.startsWith('e.') - ? propertyKey - : `e.${propertyKey}`; - const innerPropertyKey = propertyKey.startsWith('e.') - ? propertyKey.replace(/^e\./, 'e2.') - : `e2.${propertyKey}`; - return `${innerPropertyKey} = ${outerPropertyKey}`; + // Correlate: match the property expression with outer query's label_X value + // ClickHouse allows referencing outer query columns in correlated subqueries + return `${propertyKey} = label_${index + 1}`; }) .join(' AND '); From 35b49e0f8bd70d40f8a8d38fcd3b772db97ce513 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Tue, 23 Dec 2025 14:15:05 +0530 Subject: [PATCH 018/221] reduce breakdown query limit from 500 to 20 (#11) --- apps/start/src/components/report/reportSlice.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/start/src/components/report/reportSlice.ts b/apps/start/src/components/report/reportSlice.ts index 19e10bb22..faa18da2a 100644 --- a/apps/start/src/components/report/reportSlice.ts +++ b/apps/start/src/components/report/reportSlice.ts @@ -49,7 +49,7 @@ const initialState: InitialState = { formula: undefined, unit: undefined, metric: 'sum', - limit: 500, + limit: 20, criteria: 'on_or_after', funnelGroup: undefined, funnelWindow: undefined, From dcacd1eb348d917ab741a6de98c9e44231ed0309 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Tue, 23 Dec 2025 14:43:50 +0530 Subject: [PATCH 019/221] reduce breakdown query limit from 500 to 20 (#12) --- packages/validation/src/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/validation/src/index.ts b/packages/validation/src/index.ts index 0cc009a03..1ea0d9391 100644 --- a/packages/validation/src/index.ts +++ b/packages/validation/src/index.ts @@ -182,7 +182,7 @@ export const zChartInputBase = z.object({ limit: z .number() .optional() - .default(500) + .default(20) .describe('Limit how many series should be returned'), offset: z .number() From a001d9b429ce3d7fdc8798a15cff0bd0d4802dd2 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Tue, 23 Dec 2025 16:49:06 +0530 Subject: [PATCH 020/221] remove stale interval, make it infinite to reduce load on db (#13) --- apps/start/src/components/overview/filters/origin-filter.tsx | 4 +--- apps/start/src/components/overview/overview-metrics.tsx | 4 +--- apps/start/src/components/report-chart/area/chart.tsx | 4 +--- apps/start/src/components/report-chart/area/index.tsx | 1 - apps/start/src/components/report-chart/bar/index.tsx | 1 - apps/start/src/components/report-chart/conversion/chart.tsx | 4 +--- apps/start/src/components/report-chart/conversion/index.tsx | 1 - apps/start/src/components/report-chart/histogram/chart.tsx | 4 +--- apps/start/src/components/report-chart/histogram/index.tsx | 1 - apps/start/src/components/report-chart/line/chart.tsx | 4 +--- apps/start/src/components/report-chart/line/index.tsx | 1 - apps/start/src/components/report-chart/map/index.tsx | 1 - apps/start/src/components/report-chart/metric/index.tsx | 1 - apps/start/src/components/report-chart/pie/index.tsx | 1 - apps/start/src/components/report-chart/retention/index.tsx | 1 - apps/start/src/hooks/use-event-names.ts | 1 - apps/start/src/integrations/tanstack-query/root-provider.tsx | 2 +- 17 files changed, 7 insertions(+), 29 deletions(-) diff --git a/apps/start/src/components/overview/filters/origin-filter.tsx b/apps/start/src/components/overview/filters/origin-filter.tsx index a1251985e..9f0b3d411 100644 --- a/apps/start/src/components/overview/filters/origin-filter.tsx +++ b/apps/start/src/components/overview/filters/origin-filter.tsx @@ -17,9 +17,7 @@ export function OriginFilter() { { projectId: projectId, }, - { - staleTime: 1000 * 60 * 60, - }, + {}, ), ); diff --git a/apps/start/src/components/overview/overview-metrics.tsx b/apps/start/src/components/overview/overview-metrics.tsx index b9be57393..72e7d5877 100644 --- a/apps/start/src/components/overview/overview-metrics.tsx +++ b/apps/start/src/components/overview/overview-metrics.tsx @@ -286,9 +286,7 @@ function Chart({ endDate, range, }, - { - staleTime: 1000 * 60 * 10, - }, + {}, ), ); diff --git a/apps/start/src/components/report-chart/area/chart.tsx b/apps/start/src/components/report-chart/area/chart.tsx index 872ed19b3..56abfbe4b 100644 --- a/apps/start/src/components/report-chart/area/chart.tsx +++ b/apps/start/src/components/report-chart/area/chart.tsx @@ -65,9 +65,7 @@ export function Chart({ data }: Props) { endDate, range, }, - { - staleTime: 1000 * 60 * 10, - }, + {}, ), ); const { series, setVisibleSeries } = useVisibleSeries(data); diff --git a/apps/start/src/components/report-chart/area/index.tsx b/apps/start/src/components/report-chart/area/index.tsx index c8d3f74a4..50ed57233 100644 --- a/apps/start/src/components/report-chart/area/index.tsx +++ b/apps/start/src/components/report-chart/area/index.tsx @@ -15,7 +15,6 @@ export function ReportAreaChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/bar/index.tsx b/apps/start/src/components/report-chart/bar/index.tsx index 28f2779b9..71e1fca2f 100644 --- a/apps/start/src/components/report-chart/bar/index.tsx +++ b/apps/start/src/components/report-chart/bar/index.tsx @@ -14,7 +14,6 @@ export function ReportBarChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/conversion/chart.tsx b/apps/start/src/components/report-chart/conversion/chart.tsx index 0506fb928..0fde61f8c 100644 --- a/apps/start/src/components/report-chart/conversion/chart.tsx +++ b/apps/start/src/components/report-chart/conversion/chart.tsx @@ -55,9 +55,7 @@ export function Chart({ data }: Props) { endDate, range, }, - { - staleTime: 1000 * 60 * 10, - }, + {}, ), ); diff --git a/apps/start/src/components/report-chart/conversion/index.tsx b/apps/start/src/components/report-chart/conversion/index.tsx index fc75527b5..979b99d1c 100644 --- a/apps/start/src/components/report-chart/conversion/index.tsx +++ b/apps/start/src/components/report-chart/conversion/index.tsx @@ -17,7 +17,6 @@ export function ReportConversionChart() { const res = useQuery( trpc.chart.conversion.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/histogram/chart.tsx b/apps/start/src/components/report-chart/histogram/chart.tsx index 529ab1a28..731f07c04 100644 --- a/apps/start/src/components/report-chart/histogram/chart.tsx +++ b/apps/start/src/components/report-chart/histogram/chart.tsx @@ -73,9 +73,7 @@ export function Chart({ data }: Props) { endDate, range, }, - { - staleTime: 1000 * 60 * 10, - }, + {}, ), ); const { series, setVisibleSeries } = useVisibleSeries(data); diff --git a/apps/start/src/components/report-chart/histogram/index.tsx b/apps/start/src/components/report-chart/histogram/index.tsx index 1f6d01468..f366d3cca 100644 --- a/apps/start/src/components/report-chart/histogram/index.tsx +++ b/apps/start/src/components/report-chart/histogram/index.tsx @@ -15,7 +15,6 @@ export function ReportHistogramChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/line/chart.tsx b/apps/start/src/components/report-chart/line/chart.tsx index d546258de..7de8e739c 100644 --- a/apps/start/src/components/report-chart/line/chart.tsx +++ b/apps/start/src/components/report-chart/line/chart.tsx @@ -65,9 +65,7 @@ export function Chart({ data }: Props) { endDate, range, }, - { - staleTime: 1000 * 60 * 10, - }, + {}, ), ); const { series, setVisibleSeries } = useVisibleSeries(data); diff --git a/apps/start/src/components/report-chart/line/index.tsx b/apps/start/src/components/report-chart/line/index.tsx index 5c11c5d78..b93509fd3 100644 --- a/apps/start/src/components/report-chart/line/index.tsx +++ b/apps/start/src/components/report-chart/line/index.tsx @@ -16,7 +16,6 @@ export function ReportLineChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/map/index.tsx b/apps/start/src/components/report-chart/map/index.tsx index d6ca11c74..68cb2ef80 100644 --- a/apps/start/src/components/report-chart/map/index.tsx +++ b/apps/start/src/components/report-chart/map/index.tsx @@ -15,7 +15,6 @@ export function ReportMapChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/metric/index.tsx b/apps/start/src/components/report-chart/metric/index.tsx index ade245d03..7b1902cfa 100644 --- a/apps/start/src/components/report-chart/metric/index.tsx +++ b/apps/start/src/components/report-chart/metric/index.tsx @@ -11,7 +11,6 @@ export function ReportMetricChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/pie/index.tsx b/apps/start/src/components/report-chart/pie/index.tsx index 58f7edcc7..9feedbfb2 100644 --- a/apps/start/src/components/report-chart/pie/index.tsx +++ b/apps/start/src/components/report-chart/pie/index.tsx @@ -15,7 +15,6 @@ export function ReportPieChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/retention/index.tsx b/apps/start/src/components/report-chart/retention/index.tsx index 58bbffff4..e57524ca4 100644 --- a/apps/start/src/components/report-chart/retention/index.tsx +++ b/apps/start/src/components/report-chart/retention/index.tsx @@ -42,7 +42,6 @@ export function ReportRetentionChart() { }, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: isEnabled, }, ), diff --git a/apps/start/src/hooks/use-event-names.ts b/apps/start/src/hooks/use-event-names.ts index c81d54f5c..fee68496d 100644 --- a/apps/start/src/hooks/use-event-names.ts +++ b/apps/start/src/hooks/use-event-names.ts @@ -6,7 +6,6 @@ export function useEventNames(params: any) { const query = useQuery( trpc.chart.events.queryOptions(params, { enabled: !!params.projectId, - staleTime: 1000 * 60 * 10, }), ); return query.data ?? []; diff --git a/apps/start/src/integrations/tanstack-query/root-provider.tsx b/apps/start/src/integrations/tanstack-query/root-provider.tsx index 50ee2ac85..d632f0114 100644 --- a/apps/start/src/integrations/tanstack-query/root-provider.tsx +++ b/apps/start/src/integrations/tanstack-query/root-provider.tsx @@ -41,7 +41,7 @@ export function getContext(apiUrl: string) { const queryClient = new QueryClient({ defaultOptions: { queries: { - staleTime: 1000 * 60 * 5, + staleTime: Infinity, gcTime: 1000 * 60 * 10, refetchOnReconnect: false, }, From f16d2ca416ec23828a073def4faea24077a050c5 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Tue, 23 Dec 2025 19:26:28 +0530 Subject: [PATCH 021/221] Revert "remove stale interval, make it infinite to reduce load on db (#13)" (#14) This reverts commit a001d9b429ce3d7fdc8798a15cff0bd0d4802dd2. --- apps/start/src/components/overview/filters/origin-filter.tsx | 4 +++- apps/start/src/components/overview/overview-metrics.tsx | 4 +++- apps/start/src/components/report-chart/area/chart.tsx | 4 +++- apps/start/src/components/report-chart/area/index.tsx | 1 + apps/start/src/components/report-chart/bar/index.tsx | 1 + apps/start/src/components/report-chart/conversion/chart.tsx | 4 +++- apps/start/src/components/report-chart/conversion/index.tsx | 1 + apps/start/src/components/report-chart/histogram/chart.tsx | 4 +++- apps/start/src/components/report-chart/histogram/index.tsx | 1 + apps/start/src/components/report-chart/line/chart.tsx | 4 +++- apps/start/src/components/report-chart/line/index.tsx | 1 + apps/start/src/components/report-chart/map/index.tsx | 1 + apps/start/src/components/report-chart/metric/index.tsx | 1 + apps/start/src/components/report-chart/pie/index.tsx | 1 + apps/start/src/components/report-chart/retention/index.tsx | 1 + apps/start/src/hooks/use-event-names.ts | 1 + apps/start/src/integrations/tanstack-query/root-provider.tsx | 2 +- 17 files changed, 29 insertions(+), 7 deletions(-) diff --git a/apps/start/src/components/overview/filters/origin-filter.tsx b/apps/start/src/components/overview/filters/origin-filter.tsx index 9f0b3d411..a1251985e 100644 --- a/apps/start/src/components/overview/filters/origin-filter.tsx +++ b/apps/start/src/components/overview/filters/origin-filter.tsx @@ -17,7 +17,9 @@ export function OriginFilter() { { projectId: projectId, }, - {}, + { + staleTime: 1000 * 60 * 60, + }, ), ); diff --git a/apps/start/src/components/overview/overview-metrics.tsx b/apps/start/src/components/overview/overview-metrics.tsx index 72e7d5877..b9be57393 100644 --- a/apps/start/src/components/overview/overview-metrics.tsx +++ b/apps/start/src/components/overview/overview-metrics.tsx @@ -286,7 +286,9 @@ function Chart({ endDate, range, }, - {}, + { + staleTime: 1000 * 60 * 10, + }, ), ); diff --git a/apps/start/src/components/report-chart/area/chart.tsx b/apps/start/src/components/report-chart/area/chart.tsx index 56abfbe4b..872ed19b3 100644 --- a/apps/start/src/components/report-chart/area/chart.tsx +++ b/apps/start/src/components/report-chart/area/chart.tsx @@ -65,7 +65,9 @@ export function Chart({ data }: Props) { endDate, range, }, - {}, + { + staleTime: 1000 * 60 * 10, + }, ), ); const { series, setVisibleSeries } = useVisibleSeries(data); diff --git a/apps/start/src/components/report-chart/area/index.tsx b/apps/start/src/components/report-chart/area/index.tsx index 50ed57233..c8d3f74a4 100644 --- a/apps/start/src/components/report-chart/area/index.tsx +++ b/apps/start/src/components/report-chart/area/index.tsx @@ -15,6 +15,7 @@ export function ReportAreaChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, + staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/bar/index.tsx b/apps/start/src/components/report-chart/bar/index.tsx index 71e1fca2f..28f2779b9 100644 --- a/apps/start/src/components/report-chart/bar/index.tsx +++ b/apps/start/src/components/report-chart/bar/index.tsx @@ -14,6 +14,7 @@ export function ReportBarChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, + staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/conversion/chart.tsx b/apps/start/src/components/report-chart/conversion/chart.tsx index 0fde61f8c..0506fb928 100644 --- a/apps/start/src/components/report-chart/conversion/chart.tsx +++ b/apps/start/src/components/report-chart/conversion/chart.tsx @@ -55,7 +55,9 @@ export function Chart({ data }: Props) { endDate, range, }, - {}, + { + staleTime: 1000 * 60 * 10, + }, ), ); diff --git a/apps/start/src/components/report-chart/conversion/index.tsx b/apps/start/src/components/report-chart/conversion/index.tsx index 979b99d1c..fc75527b5 100644 --- a/apps/start/src/components/report-chart/conversion/index.tsx +++ b/apps/start/src/components/report-chart/conversion/index.tsx @@ -17,6 +17,7 @@ export function ReportConversionChart() { const res = useQuery( trpc.chart.conversion.queryOptions(report, { placeholderData: keepPreviousData, + staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/histogram/chart.tsx b/apps/start/src/components/report-chart/histogram/chart.tsx index 731f07c04..529ab1a28 100644 --- a/apps/start/src/components/report-chart/histogram/chart.tsx +++ b/apps/start/src/components/report-chart/histogram/chart.tsx @@ -73,7 +73,9 @@ export function Chart({ data }: Props) { endDate, range, }, - {}, + { + staleTime: 1000 * 60 * 10, + }, ), ); const { series, setVisibleSeries } = useVisibleSeries(data); diff --git a/apps/start/src/components/report-chart/histogram/index.tsx b/apps/start/src/components/report-chart/histogram/index.tsx index f366d3cca..1f6d01468 100644 --- a/apps/start/src/components/report-chart/histogram/index.tsx +++ b/apps/start/src/components/report-chart/histogram/index.tsx @@ -15,6 +15,7 @@ export function ReportHistogramChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, + staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/line/chart.tsx b/apps/start/src/components/report-chart/line/chart.tsx index 7de8e739c..d546258de 100644 --- a/apps/start/src/components/report-chart/line/chart.tsx +++ b/apps/start/src/components/report-chart/line/chart.tsx @@ -65,7 +65,9 @@ export function Chart({ data }: Props) { endDate, range, }, - {}, + { + staleTime: 1000 * 60 * 10, + }, ), ); const { series, setVisibleSeries } = useVisibleSeries(data); diff --git a/apps/start/src/components/report-chart/line/index.tsx b/apps/start/src/components/report-chart/line/index.tsx index b93509fd3..5c11c5d78 100644 --- a/apps/start/src/components/report-chart/line/index.tsx +++ b/apps/start/src/components/report-chart/line/index.tsx @@ -16,6 +16,7 @@ export function ReportLineChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, + staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/map/index.tsx b/apps/start/src/components/report-chart/map/index.tsx index 68cb2ef80..d6ca11c74 100644 --- a/apps/start/src/components/report-chart/map/index.tsx +++ b/apps/start/src/components/report-chart/map/index.tsx @@ -15,6 +15,7 @@ export function ReportMapChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, + staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/metric/index.tsx b/apps/start/src/components/report-chart/metric/index.tsx index 7b1902cfa..ade245d03 100644 --- a/apps/start/src/components/report-chart/metric/index.tsx +++ b/apps/start/src/components/report-chart/metric/index.tsx @@ -11,6 +11,7 @@ export function ReportMetricChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, + staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/pie/index.tsx b/apps/start/src/components/report-chart/pie/index.tsx index 9feedbfb2..58f7edcc7 100644 --- a/apps/start/src/components/report-chart/pie/index.tsx +++ b/apps/start/src/components/report-chart/pie/index.tsx @@ -15,6 +15,7 @@ export function ReportPieChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, + staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/retention/index.tsx b/apps/start/src/components/report-chart/retention/index.tsx index e57524ca4..58bbffff4 100644 --- a/apps/start/src/components/report-chart/retention/index.tsx +++ b/apps/start/src/components/report-chart/retention/index.tsx @@ -42,6 +42,7 @@ export function ReportRetentionChart() { }, { placeholderData: keepPreviousData, + staleTime: 1000 * 60 * 1, enabled: isEnabled, }, ), diff --git a/apps/start/src/hooks/use-event-names.ts b/apps/start/src/hooks/use-event-names.ts index fee68496d..c81d54f5c 100644 --- a/apps/start/src/hooks/use-event-names.ts +++ b/apps/start/src/hooks/use-event-names.ts @@ -6,6 +6,7 @@ export function useEventNames(params: any) { const query = useQuery( trpc.chart.events.queryOptions(params, { enabled: !!params.projectId, + staleTime: 1000 * 60 * 10, }), ); return query.data ?? []; diff --git a/apps/start/src/integrations/tanstack-query/root-provider.tsx b/apps/start/src/integrations/tanstack-query/root-provider.tsx index d632f0114..50ee2ac85 100644 --- a/apps/start/src/integrations/tanstack-query/root-provider.tsx +++ b/apps/start/src/integrations/tanstack-query/root-provider.tsx @@ -41,7 +41,7 @@ export function getContext(apiUrl: string) { const queryClient = new QueryClient({ defaultOptions: { queries: { - staleTime: Infinity, + staleTime: 1000 * 60 * 5, gcTime: 1000 * 60 * 10, refetchOnReconnect: false, }, From d091ed02104084d12bc8910a66b80c3b5507c483 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Tue, 23 Dec 2025 19:33:18 +0530 Subject: [PATCH 022/221] Revert "Revert "remove stale interval, make it infinte to reduce load on db"" (#15) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Revert "Revert "remove stale interval, make it infinite to reduce load on db …" This reverts commit f16d2ca416ec23828a073def4faea24077a050c5. * change staleTime --- .../components/overview/filters/origin-filter.tsx | 4 +--- .../src/components/overview/overview-metrics.tsx | 4 +--- .../src/components/report-chart/area/chart.tsx | 4 +--- .../src/components/report-chart/area/index.tsx | 1 - .../start/src/components/report-chart/bar/index.tsx | 1 - .../components/report-chart/conversion/chart.tsx | 4 +--- .../components/report-chart/conversion/index.tsx | 1 - .../src/components/report-chart/histogram/chart.tsx | 4 +--- .../src/components/report-chart/histogram/index.tsx | 1 - .../src/components/report-chart/line/chart.tsx | 4 +--- .../src/components/report-chart/line/index.tsx | 1 - .../start/src/components/report-chart/map/index.tsx | 1 - .../src/components/report-chart/metric/index.tsx | 1 - .../start/src/components/report-chart/pie/index.tsx | 1 - .../src/components/report-chart/retention/index.tsx | 1 - apps/start/src/hooks/use-event-names.ts | 1 - .../integrations/tanstack-query/root-provider.tsx | 13 +++++++++++-- 17 files changed, 17 insertions(+), 30 deletions(-) diff --git a/apps/start/src/components/overview/filters/origin-filter.tsx b/apps/start/src/components/overview/filters/origin-filter.tsx index a1251985e..9f0b3d411 100644 --- a/apps/start/src/components/overview/filters/origin-filter.tsx +++ b/apps/start/src/components/overview/filters/origin-filter.tsx @@ -17,9 +17,7 @@ export function OriginFilter() { { projectId: projectId, }, - { - staleTime: 1000 * 60 * 60, - }, + {}, ), ); diff --git a/apps/start/src/components/overview/overview-metrics.tsx b/apps/start/src/components/overview/overview-metrics.tsx index b9be57393..72e7d5877 100644 --- a/apps/start/src/components/overview/overview-metrics.tsx +++ b/apps/start/src/components/overview/overview-metrics.tsx @@ -286,9 +286,7 @@ function Chart({ endDate, range, }, - { - staleTime: 1000 * 60 * 10, - }, + {}, ), ); diff --git a/apps/start/src/components/report-chart/area/chart.tsx b/apps/start/src/components/report-chart/area/chart.tsx index 872ed19b3..56abfbe4b 100644 --- a/apps/start/src/components/report-chart/area/chart.tsx +++ b/apps/start/src/components/report-chart/area/chart.tsx @@ -65,9 +65,7 @@ export function Chart({ data }: Props) { endDate, range, }, - { - staleTime: 1000 * 60 * 10, - }, + {}, ), ); const { series, setVisibleSeries } = useVisibleSeries(data); diff --git a/apps/start/src/components/report-chart/area/index.tsx b/apps/start/src/components/report-chart/area/index.tsx index c8d3f74a4..50ed57233 100644 --- a/apps/start/src/components/report-chart/area/index.tsx +++ b/apps/start/src/components/report-chart/area/index.tsx @@ -15,7 +15,6 @@ export function ReportAreaChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/bar/index.tsx b/apps/start/src/components/report-chart/bar/index.tsx index 28f2779b9..71e1fca2f 100644 --- a/apps/start/src/components/report-chart/bar/index.tsx +++ b/apps/start/src/components/report-chart/bar/index.tsx @@ -14,7 +14,6 @@ export function ReportBarChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/conversion/chart.tsx b/apps/start/src/components/report-chart/conversion/chart.tsx index 0506fb928..0fde61f8c 100644 --- a/apps/start/src/components/report-chart/conversion/chart.tsx +++ b/apps/start/src/components/report-chart/conversion/chart.tsx @@ -55,9 +55,7 @@ export function Chart({ data }: Props) { endDate, range, }, - { - staleTime: 1000 * 60 * 10, - }, + {}, ), ); diff --git a/apps/start/src/components/report-chart/conversion/index.tsx b/apps/start/src/components/report-chart/conversion/index.tsx index fc75527b5..979b99d1c 100644 --- a/apps/start/src/components/report-chart/conversion/index.tsx +++ b/apps/start/src/components/report-chart/conversion/index.tsx @@ -17,7 +17,6 @@ export function ReportConversionChart() { const res = useQuery( trpc.chart.conversion.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/histogram/chart.tsx b/apps/start/src/components/report-chart/histogram/chart.tsx index 529ab1a28..731f07c04 100644 --- a/apps/start/src/components/report-chart/histogram/chart.tsx +++ b/apps/start/src/components/report-chart/histogram/chart.tsx @@ -73,9 +73,7 @@ export function Chart({ data }: Props) { endDate, range, }, - { - staleTime: 1000 * 60 * 10, - }, + {}, ), ); const { series, setVisibleSeries } = useVisibleSeries(data); diff --git a/apps/start/src/components/report-chart/histogram/index.tsx b/apps/start/src/components/report-chart/histogram/index.tsx index 1f6d01468..f366d3cca 100644 --- a/apps/start/src/components/report-chart/histogram/index.tsx +++ b/apps/start/src/components/report-chart/histogram/index.tsx @@ -15,7 +15,6 @@ export function ReportHistogramChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/line/chart.tsx b/apps/start/src/components/report-chart/line/chart.tsx index d546258de..7de8e739c 100644 --- a/apps/start/src/components/report-chart/line/chart.tsx +++ b/apps/start/src/components/report-chart/line/chart.tsx @@ -65,9 +65,7 @@ export function Chart({ data }: Props) { endDate, range, }, - { - staleTime: 1000 * 60 * 10, - }, + {}, ), ); const { series, setVisibleSeries } = useVisibleSeries(data); diff --git a/apps/start/src/components/report-chart/line/index.tsx b/apps/start/src/components/report-chart/line/index.tsx index 5c11c5d78..b93509fd3 100644 --- a/apps/start/src/components/report-chart/line/index.tsx +++ b/apps/start/src/components/report-chart/line/index.tsx @@ -16,7 +16,6 @@ export function ReportLineChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/map/index.tsx b/apps/start/src/components/report-chart/map/index.tsx index d6ca11c74..68cb2ef80 100644 --- a/apps/start/src/components/report-chart/map/index.tsx +++ b/apps/start/src/components/report-chart/map/index.tsx @@ -15,7 +15,6 @@ export function ReportMapChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/metric/index.tsx b/apps/start/src/components/report-chart/metric/index.tsx index ade245d03..7b1902cfa 100644 --- a/apps/start/src/components/report-chart/metric/index.tsx +++ b/apps/start/src/components/report-chart/metric/index.tsx @@ -11,7 +11,6 @@ export function ReportMetricChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/pie/index.tsx b/apps/start/src/components/report-chart/pie/index.tsx index 58f7edcc7..9feedbfb2 100644 --- a/apps/start/src/components/report-chart/pie/index.tsx +++ b/apps/start/src/components/report-chart/pie/index.tsx @@ -15,7 +15,6 @@ export function ReportPieChart() { const res = useQuery( trpc.chart.chart.queryOptions(report, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: !isLazyLoading, }), ); diff --git a/apps/start/src/components/report-chart/retention/index.tsx b/apps/start/src/components/report-chart/retention/index.tsx index 58bbffff4..e57524ca4 100644 --- a/apps/start/src/components/report-chart/retention/index.tsx +++ b/apps/start/src/components/report-chart/retention/index.tsx @@ -42,7 +42,6 @@ export function ReportRetentionChart() { }, { placeholderData: keepPreviousData, - staleTime: 1000 * 60 * 1, enabled: isEnabled, }, ), diff --git a/apps/start/src/hooks/use-event-names.ts b/apps/start/src/hooks/use-event-names.ts index c81d54f5c..fee68496d 100644 --- a/apps/start/src/hooks/use-event-names.ts +++ b/apps/start/src/hooks/use-event-names.ts @@ -6,7 +6,6 @@ export function useEventNames(params: any) { const query = useQuery( trpc.chart.events.queryOptions(params, { enabled: !!params.projectId, - staleTime: 1000 * 60 * 10, }), ); return query.data ?? []; diff --git a/apps/start/src/integrations/tanstack-query/root-provider.tsx b/apps/start/src/integrations/tanstack-query/root-provider.tsx index 50ee2ac85..d8a2a7019 100644 --- a/apps/start/src/integrations/tanstack-query/root-provider.tsx +++ b/apps/start/src/integrations/tanstack-query/root-provider.tsx @@ -41,9 +41,18 @@ export function getContext(apiUrl: string) { const queryClient = new QueryClient({ defaultOptions: { queries: { - staleTime: 1000 * 60 * 5, - gcTime: 1000 * 60 * 10, + // Cache data for 1 hour before considering it stale + staleTime: 1000 * 60 * 60, + // Keep unused data in cache for 2 hours + gcTime: 1000 * 60 * 120, + // Don't refetch on reconnect (reduces unnecessary queries) refetchOnReconnect: false, + // Don't refetch on window focus (reduces unnecessary queries) + refetchOnWindowFocus: false, + // Don't refetch on mount if data is fresh (reduces unnecessary queries) + refetchOnMount: false, + // Retry failed queries only once + retry: 1, }, dehydrate: { serializeData: superjson.serialize }, hydrate: { deserializeData: superjson.deserialize }, From 91211875089c8a67cc3635fae1232769d243ffad Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Wed, 24 Dec 2025 13:38:06 +0530 Subject: [PATCH 023/221] add first seen event (#17) --- packages/db/src/buffers/profile-buffer.ts | 63 +++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/packages/db/src/buffers/profile-buffer.ts b/packages/db/src/buffers/profile-buffer.ts index fc82937f1..eb402cc17 100644 --- a/packages/db/src/buffers/profile-buffer.ts +++ b/packages/db/src/buffers/profile-buffer.ts @@ -6,6 +6,7 @@ import shallowEqual from 'fast-deep-equal'; import { omit } from 'ramda'; import { TABLE_NAMES, ch, chQuery } from '../clickhouse/client'; import type { IClickhouseProfile } from '../services/profile.service'; +import { createEvent } from '../services/event.service'; import { BaseBuffer } from './base-buffer'; export class ProfileBuffer extends BaseBuffer { @@ -68,6 +69,20 @@ export class ProfileBuffer extends BaseBuffer { const existingProfile = await this.fetchProfile(profile, logger); + // Create _firstSeen event for new profiles + if (!existingProfile) { + logger.info('New profile detected, creating _firstSeen event', { + profileId: profile.id, + projectId: profile.project_id, + }); + await this.createFirstSeenEvent(profile).catch((error) => { + logger.error('Failed to create _firstSeen event', { + error, + profileId: profile.id, + }); + }); + } + // Delete any properties that are not server related if we have a non-server profile if ( existingProfile?.properties.device !== 'server' && @@ -254,4 +269,52 @@ export class ProfileBuffer extends BaseBuffer { async getBufferSize() { return this.getBufferSizeWithCounter(() => this.redis.llen(this.redisKey)); } + + /** + * Creates a _firstSeen event when a new profile is detected + * This event only fires once per user in their entire journey + * The underscore prefix marks it as a system-generated event + */ + private async createFirstSeenEvent(profile: IClickhouseProfile) { + const createdAt = new Date(profile.created_at); + + await createEvent({ + name: '_firstSeen', + deviceId: profile.id, + profileId: profile.id, + projectId: profile.project_id, + sessionId: '', // No session for system events + properties: { + isExternal: profile.is_external, + source: 'system', + // Include initial profile properties + ...profile.properties, + }, + createdAt, + path: profile.properties.path || '', + origin: profile.properties.origin || '', + country: profile.properties.country || '', + city: profile.properties.city || '', + region: profile.properties.region || '', + longitude: profile.properties.longitude + ? Number(profile.properties.longitude) + : undefined, + latitude: profile.properties.latitude + ? Number(profile.properties.latitude) + : undefined, + os: profile.properties.os || '', + osVersion: profile.properties.os_version || '', + browser: profile.properties.browser || '', + browserVersion: profile.properties.browser_version || '', + device: profile.properties.device || '', + brand: profile.properties.brand || '', + model: profile.properties.model || '', + referrer: profile.properties.referrer || '', + referrerName: profile.properties.referrer_name || '', + referrerType: profile.properties.referrer_type || '', + duration: 0, + sdkName: '', + sdkVersion: '', + }); + } } From 8ef971afae90a3afa027147efa380283f35d9241 Mon Sep 17 00:00:00 2001 From: ayushjhanwar-png Date: Wed, 24 Dec 2025 18:58:58 +0530 Subject: [PATCH 024/221] Increase BullMQ timeout for import worker to prevent job stalling (#18) - Added lockDuration: 600000ms (10 minutes) to import worker - Added stalledInterval: 300000ms (5 minutes) for stall checks - Fixes "job stalled more than allowable limit" errors during long ClickHouse operations - Only affects import worker, other workers unchanged The import job's Phase 2 (ALTER TABLE UPDATE for session_id generation) takes 30+ minutes to complete for large batches, but BullMQ's default 30-second timeout was killing jobs prematurely. This conservative 10-minute timeout allows the job to complete without false stall detection. --- apps/worker/src/boot-workers.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/apps/worker/src/boot-workers.ts b/apps/worker/src/boot-workers.ts index f65a538bb..9ac1c3305 100644 --- a/apps/worker/src/boot-workers.ts +++ b/apps/worker/src/boot-workers.ts @@ -256,9 +256,15 @@ export async function bootWorkers() { const importWorker = new Worker(importQueue.name, importJob, { ...workerOptions, concurrency, + lockDuration: 600000, // 10 minutes - prevents job stalling during long ClickHouse operations + stalledInterval: 300000, // 5 minutes - check for stalled jobs }); workers.push(importWorker); - logger.info('Started worker for import', { concurrency }); + logger.info('Started worker for import', { + concurrency, + lockDuration: 600000, + stalledInterval: 300000, + }); } if (workers.length === 0) { From 6881b72c44b32c0ff3df3aeef8f9084deaebc7e2 Mon Sep 17 00:00:00 2001 From: ayushjhanwar-png Date: Wed, 24 Dec 2025 20:07:21 +0530 Subject: [PATCH 025/221] Increase timeout worker (#19) --- apps/worker/src/boot-workers.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/worker/src/boot-workers.ts b/apps/worker/src/boot-workers.ts index 9ac1c3305..583db85a3 100644 --- a/apps/worker/src/boot-workers.ts +++ b/apps/worker/src/boot-workers.ts @@ -256,14 +256,14 @@ export async function bootWorkers() { const importWorker = new Worker(importQueue.name, importJob, { ...workerOptions, concurrency, - lockDuration: 600000, // 10 minutes - prevents job stalling during long ClickHouse operations - stalledInterval: 300000, // 5 minutes - check for stalled jobs + lockDuration: 3600000, // 1 hour - prevents job stalling during long ClickHouse operations + stalledInterval: 1800000, // 30 minutes - check for stalled jobs }); workers.push(importWorker); logger.info('Started worker for import', { concurrency, - lockDuration: 600000, - stalledInterval: 300000, + lockDuration: 3600000, + stalledInterval: 1800000, }); } From 4e7c781d39d25cb95e55a73ff0114f5f395dae2b Mon Sep 17 00:00:00 2001 From: Ayush Jhanwar Date: Fri, 26 Dec 2025 15:08:30 +0530 Subject: [PATCH 026/221] feat: add detailed logging to generateSessionIds for debugging Add console.log before/after ALTER TABLE to track: - When ALTER TABLE starts - Elapsed time (ms, sec, min) - Success/failure status - Error details if failed This helps debug import job hangs in Phase 2. --- packages/db/src/services/import.service.ts | 57 ++++++++++++++++++---- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/packages/db/src/services/import.service.ts b/packages/db/src/services/import.service.ts index 189d54d59..69970abdb 100644 --- a/packages/db/src/services/import.service.ts +++ b/packages/db/src/services/import.service.ts @@ -96,6 +96,17 @@ export async function generateSessionIds( importId: string, from: string, ): Promise { + console.log('ALTER TABLE session_id generation starting', { + importId, + from, + settings: { + mutations_sync: '2', + wait_end_of_query: 1, + }, + }); + + const startTime = Date.now(); + const rangeWhere = [ 'import_id = {importId:String}', "import_status = 'pending'", @@ -120,16 +131,42 @@ export async function generateSessionIds( WHERE ${rangeWhere} `; - await ch.command({ - query: updateQuery, - query_params: { importId, from }, - clickhouse_settings: { - wait_end_of_query: 1, - mutations_sync: '2', // Wait for mutation to complete on all replicas (critical!) - send_progress_in_http_headers: 1, - http_headers_progress_interval_ms: '50000', - }, - }); + try { + await ch.command({ + query: updateQuery, + query_params: { importId, from }, + clickhouse_settings: { + wait_end_of_query: 1, + mutations_sync: '2', // Wait for mutation to complete on all replicas (critical!) + send_progress_in_http_headers: 1, + http_headers_progress_interval_ms: '50000', + }, + }); + + const elapsed = Date.now() - startTime; + + console.log('ALTER TABLE session_id generation completed', { + importId, + from, + elapsedMs: elapsed, + elapsedSec: Math.round(elapsed / 1000), + elapsedMin: (elapsed / 60000).toFixed(2), + status: 'success', + }); + } catch (error) { + const elapsed = Date.now() - startTime; + + console.error('ALTER TABLE session_id generation failed', { + importId, + from, + elapsedMs: elapsed, + elapsedSec: Math.round(elapsed / 1000), + error: (error as Error).message, + status: 'failed', + }); + + throw error; + } } /** From 9dd98e1d3efcbce4d570f8721264611c65a09951 Mon Sep 17 00:00:00 2001 From: Ayush Jhanwar Date: Fri, 26 Dec 2025 18:34:44 +0530 Subject: [PATCH 027/221] Add detailed logging to import phases 3, 4, and 5 --- packages/db/src/services/import.service.ts | 33 ++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/packages/db/src/services/import.service.ts b/packages/db/src/services/import.service.ts index 69970abdb..672febf7d 100644 --- a/packages/db/src/services/import.service.ts +++ b/packages/db/src/services/import.service.ts @@ -179,6 +179,8 @@ export async function createSessionsStartEndEvents( importId: string, from: string, ): Promise { + console.log('[Phase 3] Starting createSessionsStartEndEvents', { importId, from }); + // First, let's identify session boundaries and get first/last events for each session const rangeWhere = [ 'import_id = {importId:String}', @@ -189,6 +191,8 @@ export async function createSessionsStartEndEvents( .filter(Boolean) .join(' AND '); + console.log('[Phase 3] WHERE clause:', rangeWhere); + // Use window functions to efficiently get first event (all fields) and last event (only changing fields) // session_end only needs: properties, path, origin, created_at - the rest can be inherited from session_start const sessionEventsQuery = ` @@ -207,6 +211,7 @@ export async function createSessionsStartEndEvents( GROUP BY session_id, device_id, project_id, profile_id `; + console.log('[Phase 3] Querying session boundaries...'); const sessionEventsResult = await ch.query({ query: sessionEventsQuery, query_params: { importId, from }, @@ -255,6 +260,8 @@ export async function createSessionsStartEndEvents( last_timestamp: string; }>; + console.log('[Phase 3] Found sessions:', sessionData.length); + // Create session_start and session_end events const sessionEvents: IClickhouseEvent[] = []; @@ -381,10 +388,22 @@ export async function createSessionsStartEndEvents( }); } + console.log('[Phase 3] Created session events:', { + total: sessionEvents.length, + sessionStarts: sessionEvents.filter(e => e.name === 'session_start').length, + sessionEnds: sessionEvents.filter(e => e.name === 'session_end').length, + }); + // Insert session events into imports table if (sessionEvents.length > 0) { + console.log('[Phase 3] Inserting session events...'); await insertImportBatch(sessionEvents, importId); + console.log('[Phase 3] Session events inserted successfully'); + } else { + console.log('[Phase 3] No session events to insert'); } + + console.log('[Phase 3] Completed createSessionsStartEndEvents'); } /** @@ -395,6 +414,8 @@ export async function moveImportsToProduction( importId: string, from: string, ): Promise { + console.log('[Phase 4] Starting moveImportsToProduction', { importId, from }); + // Build the WHERE clause for migration // For session events (session_start/session_end), we don't filter by their created_at // because they're created with adjusted timestamps (±1 second) that might fall outside @@ -407,16 +428,18 @@ export async function moveImportsToProduction( ( name IN ('session_start', 'session_end') AND session_id IN ( - SELECT DISTINCT session_id + SELECT DISTINCT session_id FROM ${TABLE_NAMES.events_imports} WHERE import_id = {importId:String} - AND toDate(created_at) = {from:String} + AND toDate(created_at) = {from:String} AND name NOT IN ('session_start', 'session_end') ) ) )`; } + console.log('[Phase 4] WHERE clause:', whereClause); + const migrationQuery = ` INSERT INTO ${TABLE_NAMES.events} ( id, @@ -484,6 +507,7 @@ export async function moveImportsToProduction( ORDER BY created_at ASC `; + console.log('[Phase 4] Executing migration query...'); await ch.command({ query: migrationQuery, query_params: { importId, from }, @@ -495,12 +519,15 @@ export async function moveImportsToProduction( http_headers_progress_interval_ms: '50000', }, }); + console.log('[Phase 4] Migration completed successfully'); } export async function backfillSessionsToProduction( importId: string, from: string, ): Promise { + console.log('[Phase 5] Starting backfillSessionsToProduction', { importId, from }); + // After migrating events, populate the sessions table based on the migrated sessions // We detect all session_ids involved in this import from the imports table, // then aggregate over the production events to construct session rows. @@ -608,6 +635,7 @@ export async function backfillSessionsToProduction( GROUP BY e.session_id `; + console.log('[Phase 5] Executing sessions backfill query...'); await ch.command({ query: sessionsInsertQuery, clickhouse_settings: { @@ -618,6 +646,7 @@ export async function backfillSessionsToProduction( http_headers_progress_interval_ms: '50000', }, }); + console.log('[Phase 5] Sessions backfill completed successfully'); } /** From d220cd445f9bc6966a0f48d17aaef34fe258fd3b Mon Sep 17 00:00:00 2001 From: Ayush Jhanwar Date: Tue, 6 Jan 2026 10:52:40 +0530 Subject: [PATCH 028/221] Reduce timeout to 5 minutes for testing and log analysis - Changed lockDuration to 300000ms (5 minutes) - Changed stalledInterval to 150000ms (2.5 minutes) - This allows faster failure to analyze actual error logs - Will increase back to 1 hour once we identify the issue --- apps/worker/src/boot-workers.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/worker/src/boot-workers.ts b/apps/worker/src/boot-workers.ts index 583db85a3..42896e240 100644 --- a/apps/worker/src/boot-workers.ts +++ b/apps/worker/src/boot-workers.ts @@ -256,14 +256,14 @@ export async function bootWorkers() { const importWorker = new Worker(importQueue.name, importJob, { ...workerOptions, concurrency, - lockDuration: 3600000, // 1 hour - prevents job stalling during long ClickHouse operations - stalledInterval: 1800000, // 30 minutes - check for stalled jobs + lockDuration: 300000, // 5 minutes - for testing to see actual errors faster + stalledInterval: 150000, // 2.5 minutes - check for stalled jobs }); workers.push(importWorker); logger.info('Started worker for import', { concurrency, - lockDuration: 3600000, - stalledInterval: 1800000, + lockDuration: 300000, + stalledInterval: 150000, }); } From 60c7f5d16277bc2d4ae17a7f9f334abca6c70e74 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Thu, 8 Jan 2026 18:42:09 +0530 Subject: [PATCH 029/221] expose few materialised columns for filters (#24) --- packages/trpc/src/routers/chart.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/trpc/src/routers/chart.ts b/packages/trpc/src/routers/chart.ts index 1e3594380..3e7c35d34 100644 --- a/packages/trpc/src/routers/chart.ts +++ b/packages/trpc/src/routers/chart.ts @@ -241,6 +241,11 @@ export const chartRouter = createTRPCRouter({ 'device', 'brand', 'model', + 'source', + 'searchType', + 'sourceShowName', + 'showName', + 'isExplore', 'profile.id', 'profile.first_name', 'profile.last_name', From c5f805d529a3414a1fd2209894cd572813fb1398 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Tue, 13 Jan 2026 19:25:03 +0530 Subject: [PATCH 030/221] centralised lock for event updates (#25) --- apps/worker/src/jobs/sessions.ts | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/apps/worker/src/jobs/sessions.ts b/apps/worker/src/jobs/sessions.ts index 84b35cbb4..aca8b0c11 100644 --- a/apps/worker/src/jobs/sessions.ts +++ b/apps/worker/src/jobs/sessions.ts @@ -8,7 +8,7 @@ import { getOrganizationBillingEventsCount, getProjectEventsCount, } from '@openpanel/db'; -import { cacheable } from '@openpanel/redis'; +import { cacheable, getRedisCache } from '@openpanel/redis'; import { createSessionEnd } from './events.create-session-end'; export async function sessionsJob(job: Job) { @@ -24,6 +24,19 @@ export async function sessionsJob(job: Job) { const updateEventsCount = cacheable(async function updateEventsCount( projectId: string, ) { + // Acquire Redis lock to prevent duplicate executions across multiple workers + const lockKey = `lock:update-events:${projectId}`; + const redis = getRedisCache(); + const acquired = await redis.set(lockKey, '1', 'EX', 10, 'NX'); + + if (!acquired) { + // Another worker is already updating this project, skip + logger.info('Skipping updateEventsCount - lock held by another worker', { + projectId, + }); + return; + } + const organization = await db.organization.findFirst({ where: { projects: { From 3d11fae5daa58c726b9337d47d8fa7a5fc70566c Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Tue, 13 Jan 2026 20:00:08 +0530 Subject: [PATCH 031/221] Revert "centralised lock for event updates (#25)" (#27) This reverts commit c5f805d529a3414a1fd2209894cd572813fb1398. --- apps/worker/src/jobs/sessions.ts | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/apps/worker/src/jobs/sessions.ts b/apps/worker/src/jobs/sessions.ts index aca8b0c11..84b35cbb4 100644 --- a/apps/worker/src/jobs/sessions.ts +++ b/apps/worker/src/jobs/sessions.ts @@ -8,7 +8,7 @@ import { getOrganizationBillingEventsCount, getProjectEventsCount, } from '@openpanel/db'; -import { cacheable, getRedisCache } from '@openpanel/redis'; +import { cacheable } from '@openpanel/redis'; import { createSessionEnd } from './events.create-session-end'; export async function sessionsJob(job: Job) { @@ -24,19 +24,6 @@ export async function sessionsJob(job: Job) { const updateEventsCount = cacheable(async function updateEventsCount( projectId: string, ) { - // Acquire Redis lock to prevent duplicate executions across multiple workers - const lockKey = `lock:update-events:${projectId}`; - const redis = getRedisCache(); - const acquired = await redis.set(lockKey, '1', 'EX', 10, 'NX'); - - if (!acquired) { - // Another worker is already updating this project, skip - logger.info('Skipping updateEventsCount - lock held by another worker', { - projectId, - }); - return; - } - const organization = await db.organization.findFirst({ where: { projects: { From a7b5fff5b991937f6d6a313d85b981371de3170a Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Tue, 13 Jan 2026 20:00:44 +0530 Subject: [PATCH 032/221] Enable logs (#26) * centralised lock for event updates * enable logs --- apps/worker/src/jobs/sessions.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/worker/src/jobs/sessions.ts b/apps/worker/src/jobs/sessions.ts index 84b35cbb4..c0e14dc17 100644 --- a/apps/worker/src/jobs/sessions.ts +++ b/apps/worker/src/jobs/sessions.ts @@ -38,6 +38,7 @@ const updateEventsCount = cacheable(async function updateEventsCount( }); if (!organization) { + logger.warn('updateEventsCount: Organization not found', { projectId }); return; } From 7e61662b764bfd05ba2c9ef901333fed23a5b959 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Wed, 14 Jan 2026 10:11:46 +0530 Subject: [PATCH 033/221] add redis lock to prevent query bombarding at cache expiry (#28) --- packages/db/src/services/project.service.ts | 33 +++++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/packages/db/src/services/project.service.ts b/packages/db/src/services/project.service.ts index 7e8a997a4..f2aa166e9 100644 --- a/packages/db/src/services/project.service.ts +++ b/packages/db/src/services/project.service.ts @@ -1,4 +1,4 @@ -import { cacheable } from '@openpanel/redis'; +import { cacheable, getRedisCache } from '@openpanel/redis'; import sqlstring from 'sqlstring'; import { TABLE_NAMES, chQuery } from '../clickhouse/client'; import type { Prisma, Project } from '../prisma-client'; @@ -102,9 +102,30 @@ export async function getProjects({ return projects; } -export const getProjectEventsCount = async (projectId: string) => { - const res = await chQuery<{ count: number }>( - `SELECT count(*) as count FROM ${TABLE_NAMES.events} WHERE project_id = ${sqlstring.escape(projectId)} AND name NOT IN ('session_start', 'session_end')`, - ); - return res[0]?.count; +const getProjectEventsCountUncached = async (projectId: string) => { + const lockKey = `lock:project-events-count:${projectId}`; + const redis = getRedisCache(); + + // Try to acquire lock to prevent thundering herd across workers + const acquired = await redis.set(lockKey, '1', 'EX', 30, 'NX'); + + if (!acquired) { + // Another worker is executing this query, skip - they'll update the count + return null; + } + + try { + const res = await chQuery<{ count: number }>( + `SELECT count(*) as count FROM ${TABLE_NAMES.events} WHERE project_id = ${sqlstring.escape(projectId)} AND name NOT IN ('session_start', 'session_end')`, + ); + return res[0]?.count; + } finally { + // Release lock + redis.del(lockKey).catch(() => {}); + } }; + +export const getProjectEventsCount = cacheable( + getProjectEventsCountUncached, + 60 * 60, // 1 hour cache +); From 3732a2c5bf770543a0aad82919dc2e96830e4f0e Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Tue, 20 Jan 2026 20:42:20 +0530 Subject: [PATCH 034/221] update conversion query to use window functions (#29) --- .../db/src/services/conversion.service.ts | 66 +++++++------------ 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/packages/db/src/services/conversion.service.ts b/packages/db/src/services/conversion.service.ts index 31c53734d..647a49d2b 100644 --- a/packages/db/src/services/conversion.service.ts +++ b/packages/db/src/services/conversion.service.ts @@ -51,62 +51,46 @@ export class ConversionService { getEventFiltersWhereClause(eventB.filters), ).join(' AND '); - const eventACte = clix(this.client, timezone) - .select([ - `DISTINCT ${group}`, - 'created_at AS a_time', - `${clix.toStartOf('created_at', interval)} AS event_day`, - ...breakdownColumns, - ]) - .from(TABLE_NAMES.events) - .where('project_id', '=', projectId) - .where('name', '=', eventA.name) - .rawWhere(whereA) - .where('created_at', 'BETWEEN', [ - clix.datetime(startDate, 'toDateTime'), - clix.datetime(endDate, 'toDateTime'), - ]); - - const eventBCte = clix(this.client, timezone) - .select([group, 'created_at AS b_time']) - .from(TABLE_NAMES.events) - .where('project_id', '=', projectId) - .where('name', '=', eventB.name) - .rawWhere(whereB) - .where('created_at', 'BETWEEN', [ - clix.datetime(startDate, 'toDateTime'), - clix.datetime(endDate, 'toDateTime'), - ]); + const funnelWindowSeconds = funnelWindow * 3600; + + // Build funnel conditions + const conditionA = whereA ? `(name = '${eventA.name}' AND ${whereA})` : `name = '${eventA.name}'`; + const conditionB = whereB ? `(name = '${eventB.name}' AND ${whereB})` : `name = '${eventB.name}'`; + // Use windowFunnel approach - single scan, no JOIN const query = clix(this.client, timezone) - .with('event_a', eventACte) - .with('event_b', eventBCte) .select<{ event_day: string; total_first: number; conversions: number; conversion_rate_percentage: number; - [key: string]: string | number; // For breakdown columns + [key: string]: string | number; }>([ 'event_day', ...breakdownGroupBy, - 'count(*) AS total_first', - 'sum(if(conversion_time IS NOT NULL, 1, 0)) AS conversions', - 'round(100.0 * sum(if(conversion_time IS NOT NULL, 1, 0)) / count(*), 2) AS conversion_rate_percentage', + `uniqExact(${group}) AS total_first`, + `countIf(steps >= 2) AS conversions`, + `round(100.0 * countIf(steps >= 2) / uniqExact(${group}), 2) AS conversion_rate_percentage`, ]) .from( clix.exp(` - (SELECT - a.${group}, - a.a_time, - a.event_day, - ${breakdownGroupBy.length ? `${breakdownGroupBy.join(', ')},` : ''} - nullIf(min(CASE WHEN b.b_time BETWEEN a.a_time AND a.a_time + INTERVAL ${funnelWindow} HOUR THEN b.b_time END), '1970-01-01 00:00:00.000') AS conversion_time - FROM event_a AS a - LEFT JOIN event_b AS b ON a.${group} = b.${group} - GROUP BY a.${group}, a.a_time, a.event_day${breakdownGroupBy.length ? `, ${breakdownGroupBy.join(', ')}` : ''}) + (SELECT + ${group}, + any(${clix.toStartOf('created_at', interval)}) as event_day, + ${breakdownGroupBy.length ? `${breakdownGroupBy.map(b => `any(${b}) as ${b}`).join(', ')},` : ''} + windowFunnel(${funnelWindowSeconds})( + toDateTime(created_at), + ${conditionA}, + ${conditionB} + ) as steps + FROM ${TABLE_NAMES.events} + WHERE project_id = '${projectId}' + AND name IN ('${eventA.name}', '${eventB.name}') + AND created_at BETWEEN toDateTime('${startDate}') AND toDateTime('${endDate}') + GROUP BY ${group}${breakdownGroupBy.length ? `, ${breakdownGroupBy.join(', ')}` : ''}) `), ) + .where('steps', '>', 0) .groupBy(['event_day', ...breakdownGroupBy]); for (const order of ['event_day', ...breakdownGroupBy]) { From d06d255b138fa656e2503a57e8c55537062a6a61 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Tue, 20 Jan 2026 21:33:24 +0530 Subject: [PATCH 035/221] correlated subquery optimisations in charts service (#30) --- packages/db/src/services/chart.service.ts | 66 +++++++++++++---------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/packages/db/src/services/chart.service.ts b/packages/db/src/services/chart.service.ts index 6d2936479..954f10089 100644 --- a/packages/db/src/services/chart.service.ts +++ b/packages/db/src/services/chart.service.ts @@ -295,42 +295,50 @@ export function getChartSql({ return sql; } - // Note: The profile CTE (if it exists) is available in subqueries, so we can reference it directly if (breakdowns.length > 0) { - // Match breakdown properties in subquery with outer query's grouped values - // Since outer query groups by label_X, we reference those in the correlation - const breakdownMatches = breakdowns + const breakdownSelects = breakdowns + .map((b, index) => { + const propertyKey = getSelectPropertyKey(b.name); + return `${propertyKey} as breakdown_${index + 1}`; + }) + .join(', '); + + const breakdownGroupBy = breakdowns + .map((b, index) => `breakdown_${index + 1}`) + .join(', '); + + const totalCountWhere = getWhereWithoutBar(); + + addCte( + 'breakdown_totals', + `SELECT + ${breakdownSelects}, + uniq(profile_id) as total_count + FROM ${TABLE_NAMES.events} + ${profilesJoinRef ? `${profilesJoinRef} ` : ''}${totalCountWhere} + GROUP BY ${breakdownGroupBy}`, + ); + + const joinConditions = breakdowns .map((b, index) => { const propertyKey = getSelectPropertyKey(b.name); - // Correlate: match the property expression with outer query's label_X value - // ClickHouse allows referencing outer query columns in correlated subqueries - return `${propertyKey} = label_${index + 1}`; + return `breakdown_totals.breakdown_${index + 1} = ${propertyKey}`; }) .join(' AND '); - // Build WHERE clause for subquery - replace table alias and keep profile CTE reference - const subqueryWhere = getWhereWithoutBar() - .replace(/\be\./g, 'e2.') - .replace(/\bprofile\./g, 'profile.'); - - sb.select.total_unique_count = `( - SELECT uniq(profile_id) - FROM ${TABLE_NAMES.events} e2 - ${profilesJoinRef ? `${profilesJoinRef} ` : ''}${subqueryWhere} - AND ${breakdownMatches} - ) as total_count`; + sb.joins.breakdown_totals = `LEFT JOIN breakdown_totals ON ${joinConditions}`; + sb.select.total_unique_count = `any(breakdown_totals.total_count) as total_count`; } else { - // No breakdowns: calculate unique count across all data - // Build WHERE clause for subquery - replace table alias and keep profile CTE reference - const subqueryWhere = getWhereWithoutBar() - .replace(/\be\./g, 'e2.') - .replace(/\bprofile\./g, 'profile.'); - - sb.select.total_unique_count = `( - SELECT uniq(profile_id) - FROM ${TABLE_NAMES.events} e2 - ${profilesJoinRef ? `${profilesJoinRef} ` : ''}${subqueryWhere} - ) as total_count`; + const totalCountWhere = getWhereWithoutBar(); + + addCte( + 'total_unique', + `SELECT uniq(profile_id) as total_count + FROM ${TABLE_NAMES.events} + ${profilesJoinRef ? `${profilesJoinRef} ` : ''}${totalCountWhere}`, + ); + + sb.select.total_unique_count = `(SELECT total_count FROM total_unique) as total_count`; } const sql = `${getWith()}${getSelect()} ${getFrom()} ${getJoins()} ${getWhere()} ${getGroupBy()} ${getOrderBy()} ${getFill()}`; From 2ac21b173fa6e6b1c173b2efeb19c325a06e5cb5 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Wed, 21 Jan 2026 02:19:17 +0530 Subject: [PATCH 036/221] large chart optimisations, properties with materialised columns optimsations (#31) --- packages/db/src/services/chart.service.ts | 130 +++++++++++++++++++++- packages/trpc/src/routers/chart.ts | 5 - 2 files changed, 128 insertions(+), 7 deletions(-) diff --git a/packages/db/src/services/chart.service.ts b/packages/db/src/services/chart.service.ts index 954f10089..71a1b3416 100644 --- a/packages/db/src/services/chart.service.ts +++ b/packages/db/src/services/chart.service.ts @@ -50,6 +50,21 @@ export function getSelectPropertyKey(property: string) { ); if (!match) return property; + // Use materialized columns instead of Map access for better performance + // These columns have indexes and are 10-50x faster than properties['key'] + const materializedColumns: Record = { + 'properties.action': 'action', + 'properties.isExplore': 'isExplore', + 'properties.searchType': 'searchType', + 'properties.showName': 'showName', + 'properties.source': 'source', + 'properties.sourceShowName': 'sourceShowName', + }; + + if (materializedColumns[property]) { + return materializedColumns[property]!; + } + if (property.includes('*')) { return `arrayMap(x -> trim(x), mapValues(mapExtractKeyLike(${match}, ${sqlstring.escape( transformPropertyKey(property), @@ -59,6 +74,105 @@ export function getSelectPropertyKey(property: string) { return `${match}['${property.replace(new RegExp(`^${match}.`), '')}']`; } +function getChartSqlFromMaterializedView({ + event, + interval, + startDate, + endDate, + projectId, + timezone, +}: { + event: IGetChartDataInput['event']; + interval: IGetChartDataInput['interval']; + startDate: string; + endDate: string; + projectId: string; + timezone: string; +}): string { + const { sb, getSelect, getWhere, getGroupBy, getOrderBy, getFill } = + createSqlBuilder(); + + // Use materialized view table + sb.from = 'events_daily_stats'; + + // Base filters + sb.where.projectId = `project_id = ${sqlstring.escape(projectId)}`; + if (event.name !== '*') { + sb.where.eventName = `name = ${sqlstring.escape(event.name)}`; + } + sb.where.dateRange = `date >= toDate(${sqlstring.escape(startDate)}) AND date <= toDate(${sqlstring.escape(endDate)})`; + + // Label + if (event.name !== '*') { + sb.select.label_0 = `${sqlstring.escape(event.name)} as label_0`; + } else { + sb.select.label_0 = `'*' as label_0`; + } + + // Count based on segment + if (event.segment === 'user') { + sb.select.count = 'uniqMerge(unique_profiles_state) as count'; + } else if (event.segment === 'session') { + sb.select.count = 'uniqMerge(unique_sessions_state) as count'; + } else { + sb.select.count = 'sum(event_count) as count'; + } + + // Date aggregation based on interval + if (interval === 'day') { + sb.select.date = 'date'; + sb.groupBy.date = 'date'; + } else if (interval === 'week') { + sb.select.date = 'toStartOfWeek(date, 1) as date'; + sb.groupBy.date = 'toStartOfWeek(date, 1)'; + } else if (interval === 'month') { + sb.select.date = 'toStartOfMonth(date) as date'; + sb.groupBy.date = 'toStartOfMonth(date)'; + } + + sb.orderBy.date = 'date ASC'; + + // Build WITH FILL for date gaps + let fillClause = ''; + if (interval === 'day') { + fillClause = `WITH FILL FROM toDate(${sqlstring.escape(startDate)}) TO toDate(${sqlstring.escape(endDate)}) STEP toIntervalDay(1)`; + } else if (interval === 'week') { + fillClause = `WITH FILL FROM toStartOfWeek(toDate(${sqlstring.escape(startDate)}), 1) TO toStartOfWeek(toDate(${sqlstring.escape(endDate)}), 1) STEP toIntervalWeek(1)`; + } else if (interval === 'month') { + fillClause = `WITH FILL FROM toStartOfMonth(toDate(${sqlstring.escape(startDate)})) TO toStartOfMonth(toDate(${sqlstring.escape(endDate)})) STEP toIntervalMonth(1)`; + } + + const sql = `${getSelect()} FROM ${sb.from} ${getWhere()} ${getGroupBy()} ${getOrderBy()} ${fillClause}`; + + console.log('-- Using Materialized View --'); + console.log(sql.replaceAll(/[\n\r]/g, ' ')); + console.log('-- End --'); + + return sql; +} + +function canUseMaterializedView( + event: IGetChartDataInput['event'], + breakdowns: IGetChartDataInput['breakdowns'], + interval: IGetChartDataInput['interval'], +): boolean { + // Can use MV if: + // 1. Interval is day or larger (not hour/minute) + // 2. No breakdowns OR single breakdown with no filters + // 3. Segment is 'user' or 'session' or 'event' + // 4. No complex property filters + const validIntervals = ['day', 'week', 'month']; + const validSegments = ['user', 'session', 'event']; + + return ( + validIntervals.includes(interval) && + validSegments.includes(event.segment ?? 'event') && + breakdowns.length === 0 && + (!event.filters || event.filters.length === 0) && + event.segment !== 'one_event_per_user' + ); +} + export function getChartSql({ event, breakdowns, @@ -70,6 +184,18 @@ export function getChartSql({ timezone, chartType, }: IGetChartDataInput & { timezone: string }) { + // Check if we can use materialized view for fast queries + if (canUseMaterializedView(event, breakdowns, interval)) { + return getChartSqlFromMaterializedView({ + event, + interval, + startDate, + endDate, + projectId, + timezone, + }); + } + const { sb, join, @@ -247,11 +373,11 @@ export function getChartSql({ }); if (event.segment === 'user') { - sb.select.count = 'countDistinct(profile_id) as count'; + sb.select.count = 'uniq(profile_id) as count'; } if (event.segment === 'session') { - sb.select.count = 'countDistinct(session_id) as count'; + sb.select.count = 'uniq(session_id) as count'; } if (event.segment === 'user_average') { diff --git a/packages/trpc/src/routers/chart.ts b/packages/trpc/src/routers/chart.ts index 3e7c35d34..1e3594380 100644 --- a/packages/trpc/src/routers/chart.ts +++ b/packages/trpc/src/routers/chart.ts @@ -241,11 +241,6 @@ export const chartRouter = createTRPCRouter({ 'device', 'brand', 'model', - 'source', - 'searchType', - 'sourceShowName', - 'showName', - 'isExplore', 'profile.id', 'profile.first_name', 'profile.last_name', From 945315cd4ba8174a2506e22333e1807ac18b7ccb Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Wed, 21 Jan 2026 02:46:07 +0530 Subject: [PATCH 037/221] explicit handling for week and month (#32) --- packages/db/src/services/chart.service.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/db/src/services/chart.service.ts b/packages/db/src/services/chart.service.ts index 71a1b3416..25d02cf1b 100644 --- a/packages/db/src/services/chart.service.ts +++ b/packages/db/src/services/chart.service.ts @@ -122,16 +122,17 @@ function getChartSqlFromMaterializedView({ if (interval === 'day') { sb.select.date = 'date'; sb.groupBy.date = 'date'; + sb.orderBy.date = 'date ASC'; } else if (interval === 'week') { sb.select.date = 'toStartOfWeek(date, 1) as date'; sb.groupBy.date = 'toStartOfWeek(date, 1)'; + sb.orderBy.date = 'toStartOfWeek(date, 1) ASC'; } else if (interval === 'month') { sb.select.date = 'toStartOfMonth(date) as date'; sb.groupBy.date = 'toStartOfMonth(date)'; + sb.orderBy.date = 'toStartOfMonth(date) ASC'; } - sb.orderBy.date = 'date ASC'; - // Build WITH FILL for date gaps let fillClause = ''; if (interval === 'day') { From 4c50caec72deb7618030d64962831d67f7318b15 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Wed, 21 Jan 2026 03:18:50 +0530 Subject: [PATCH 038/221] fix week and monthly queries (#33) --- packages/db/src/services/chart.service.ts | 40 +++++++++++------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/packages/db/src/services/chart.service.ts b/packages/db/src/services/chart.service.ts index 25d02cf1b..479c79152 100644 --- a/packages/db/src/services/chart.service.ts +++ b/packages/db/src/services/chart.service.ts @@ -92,15 +92,15 @@ function getChartSqlFromMaterializedView({ const { sb, getSelect, getWhere, getGroupBy, getOrderBy, getFill } = createSqlBuilder(); - // Use materialized view table - sb.from = 'events_daily_stats'; + // Use materialized view table with alias to avoid column name conflicts + sb.from = 'events_daily_stats t'; - // Base filters - sb.where.projectId = `project_id = ${sqlstring.escape(projectId)}`; + // Base filters (use table alias) + sb.where.projectId = `t.project_id = ${sqlstring.escape(projectId)}`; if (event.name !== '*') { - sb.where.eventName = `name = ${sqlstring.escape(event.name)}`; + sb.where.eventName = `t.name = ${sqlstring.escape(event.name)}`; } - sb.where.dateRange = `date >= toDate(${sqlstring.escape(startDate)}) AND date <= toDate(${sqlstring.escape(endDate)})`; + sb.where.dateRange = `t.date >= toDate(${sqlstring.escape(startDate)}) AND t.date <= toDate(${sqlstring.escape(endDate)})`; // Label if (event.name !== '*') { @@ -109,28 +109,28 @@ function getChartSqlFromMaterializedView({ sb.select.label_0 = `'*' as label_0`; } - // Count based on segment + // Count based on segment (use table alias) if (event.segment === 'user') { - sb.select.count = 'uniqMerge(unique_profiles_state) as count'; + sb.select.count = 'uniqMerge(t.unique_profiles_state) as count'; } else if (event.segment === 'session') { - sb.select.count = 'uniqMerge(unique_sessions_state) as count'; + sb.select.count = 'uniqMerge(t.unique_sessions_state) as count'; } else { - sb.select.count = 'sum(event_count) as count'; + sb.select.count = 'sum(t.event_count) as count'; } - // Date aggregation based on interval + // Date aggregation based on interval (use table alias) if (interval === 'day') { - sb.select.date = 'date'; - sb.groupBy.date = 'date'; - sb.orderBy.date = 'date ASC'; + sb.select.date = 't.date'; + sb.groupBy.date = 't.date'; + sb.orderBy.date = 't.date ASC'; } else if (interval === 'week') { - sb.select.date = 'toStartOfWeek(date, 1) as date'; - sb.groupBy.date = 'toStartOfWeek(date, 1)'; - sb.orderBy.date = 'toStartOfWeek(date, 1) ASC'; + sb.select.date = 'toStartOfWeek(t.date, 1) as date'; + sb.groupBy.date = 'toStartOfWeek(t.date, 1)'; + sb.orderBy.date = 'toStartOfWeek(t.date, 1) ASC'; } else if (interval === 'month') { - sb.select.date = 'toStartOfMonth(date) as date'; - sb.groupBy.date = 'toStartOfMonth(date)'; - sb.orderBy.date = 'toStartOfMonth(date) ASC'; + sb.select.date = 'toStartOfMonth(t.date) as date'; + sb.groupBy.date = 'toStartOfMonth(t.date)'; + sb.orderBy.date = 'toStartOfMonth(t.date) ASC'; } // Build WITH FILL for date gaps From 3dd6260ae26649f96828b6b2e3b4e3f6add6e1c7 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Wed, 21 Jan 2026 13:34:11 +0530 Subject: [PATCH 039/221] add mv for daily event stats (#34) --- .../code-migrations/9-events-daily-stats.ts | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 packages/db/code-migrations/9-events-daily-stats.ts diff --git a/packages/db/code-migrations/9-events-daily-stats.ts b/packages/db/code-migrations/9-events-daily-stats.ts new file mode 100644 index 000000000..c471f85cf --- /dev/null +++ b/packages/db/code-migrations/9-events-daily-stats.ts @@ -0,0 +1,165 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import { + chMigrationClient, + createMaterializedView, + moveDataBetweenTables, + runClickhouseMigrationCommands, +} from '../src/clickhouse/migration'; +import { getIsCluster } from './helpers'; + +/** + * Migration 9: Create events_daily_stats to include ALL events + * + * Prerequisites: Drop existing events_daily_stats before running this migration + * + * This migration creates events_daily_stats MV that includes ALL events for complete analytics. + * Previous version excluded session_start, session_end, and screen_view events. + * + * Steps: + * 1. Create events_daily_stats without POPULATE (starts capturing new data) + * 2. Backfill ALL historical data from events table using day-by-day batch inserts + */ +export async function up() { + const isClustered = getIsCluster(); + const sqls: string[] = []; + + // Step 1: Create MV without POPULATE (so it starts capturing new data immediately) + const mvStatements = createMaterializedView({ + name: 'events_daily_stats', + tableName: 'events', + orderBy: ['project_id', 'name', 'date'], + partitionBy: 'toYYYYMMDD(date)', + query: `SELECT + project_id, + name, + toDate(created_at) as date, + uniqState(profile_id) as unique_profiles_state, + uniqState(session_id) as unique_sessions_state, + count() as event_count + FROM {events} + GROUP BY project_id, name, date`, + distributionHash: 'cityHash64(project_id, name, date)', + replicatedVersion: '1', + isClustered, + populate: false, // Don't use POPULATE to avoid timeout + }); + + sqls.push(...mvStatements); + + // Step 2: Backfill all historical data from events table + // First, check the actual date range in the events table + const checkDataQuery = await chMigrationClient.query({ + query: ` + SELECT + min(toDate(created_at)) as min_date, + max(toDate(created_at)) as max_date, + count() as total_events + FROM events + `, + format: 'JSONEachRow', + }); + + const dataRange = await checkDataQuery.json<{ + min_date: string; + max_date: string; + total_events: string; + }>(); + + if (dataRange[0]?.min_date && dataRange[0]?.max_date) { + const startDate = new Date(dataRange[0].min_date); + const endDate = new Date(dataRange[0].max_date); + endDate.setDate(endDate.getDate() + 1); // Make it exclusive (next day) + + const totalEvents = Number(dataRange[0].total_events); + const daysDiff = Math.ceil((endDate.getTime() - startDate.getTime()) / (1000 * 60 * 60 * 24)); + + console.log('========================================'); + console.log('📊 Backfill Plan:'); + console.log(` Start Date: ${startDate.toISOString().split('T')[0]}`); + console.log(` End Date: ${dataRange[0].max_date}`); + console.log(` Days: ${daysDiff} days`); + console.log(` Events: ${totalEvents.toLocaleString()} total events`); + console.log('========================================'); + console.log(''); + + // Use day-by-day batching to avoid timeouts + const backfillSqls = moveDataBetweenTables({ + from: 'events', + to: isClustered ? 'events_daily_stats_replicated' : 'events_daily_stats', + columns: [ + 'project_id', + 'name', + 'toDate(created_at) as date', + 'uniqState(profile_id) as unique_profiles_state', + 'uniqState(session_id) as unique_sessions_state', + 'count() as event_count', + ], + batch: { + startDate, + endDate, + column: 'toDate(created_at)', + interval: 'day', + transform: (date: Date) => { + const year = date.getFullYear(); + const month = String(date.getMonth() + 1).padStart(2, '0'); + const day = String(date.getDate()).padStart(2, '0'); + return `${year}-${month}-${day}`; + }, + }, + }); + + // Wrap each INSERT with GROUP BY since we're selecting aggregates + const groupedBackfillSqls = backfillSqls.map((sql) => { + // Extract the WHERE clause and modify the query structure + const whereMatch = sql.match(/WHERE (.+)$/); + if (!whereMatch) return sql; + + const whereClause = whereMatch[1]; + const [insertPart] = sql.split('SELECT'); + + return `${insertPart} + SELECT + project_id, + name, + toDate(created_at) as date, + uniqState(profile_id) as unique_profiles_state, + uniqState(session_id) as unique_sessions_state, + count() as event_count + FROM events + WHERE ${whereClause} + GROUP BY project_id, name, date`; + }); + + sqls.push(...groupedBackfillSqls); + } else { + console.log('No data found in the specified date range, skipping backfill'); + } + + // Write SQL to file for review + const sqlFilePath = path.join(__filename.replace('.ts', '.sql')); + fs.writeFileSync( + sqlFilePath, + sqls + .map((sql) => + sql + .trim() + .replace(/;$/, '') + .replace(/\n{2,}/g, '\n') + .concat(';'), + ) + .join('\n\n---\n\n'), + ); + + console.log(`Generated ${sqls.length} SQL statements`); + console.log(`SQL written to: ${sqlFilePath}`); + + // Execute if not in dry-run mode + if (!process.argv.includes('--dry')) { + await runClickhouseMigrationCommands(sqls); + console.log('✅ Migration completed successfully!'); + console.log('Next steps: Verify data in events_daily_stats'); + } else { + console.log('Dry-run mode: SQL generated but not executed'); + } +} From dac8cb1deb468e39bdcf87c0d245f24b0eec4294 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Wed, 21 Jan 2026 14:41:14 +0530 Subject: [PATCH 040/221] fix migration script (#35) * fix migration script * fix migration script --- .../code-migrations/9-events-daily-stats.ts | 61 ++++++------------- 1 file changed, 19 insertions(+), 42 deletions(-) diff --git a/packages/db/code-migrations/9-events-daily-stats.ts b/packages/db/code-migrations/9-events-daily-stats.ts index c471f85cf..0312bfcd3 100644 --- a/packages/db/code-migrations/9-events-daily-stats.ts +++ b/packages/db/code-migrations/9-events-daily-stats.ts @@ -69,56 +69,30 @@ export async function up() { if (dataRange[0]?.min_date && dataRange[0]?.max_date) { const startDate = new Date(dataRange[0].min_date); const endDate = new Date(dataRange[0].max_date); - endDate.setDate(endDate.getDate() + 1); // Make it exclusive (next day) const totalEvents = Number(dataRange[0].total_events); - const daysDiff = Math.ceil((endDate.getTime() - startDate.getTime()) / (1000 * 60 * 60 * 24)); + const daysDiff = Math.ceil((endDate.getTime() - startDate.getTime()) / (1000 * 60 * 60 * 24)) + 1; console.log('========================================'); console.log('📊 Backfill Plan:'); console.log(` Start Date: ${startDate.toISOString().split('T')[0]}`); - console.log(` End Date: ${dataRange[0].max_date}`); + console.log(` End Date: ${endDate.toISOString().split('T')[0]} (inclusive)`); console.log(` Days: ${daysDiff} days`); console.log(` Events: ${totalEvents.toLocaleString()} total events`); console.log('========================================'); console.log(''); - // Use day-by-day batching to avoid timeouts - const backfillSqls = moveDataBetweenTables({ - from: 'events', - to: isClustered ? 'events_daily_stats_replicated' : 'events_daily_stats', - columns: [ - 'project_id', - 'name', - 'toDate(created_at) as date', - 'uniqState(profile_id) as unique_profiles_state', - 'uniqState(session_id) as unique_sessions_state', - 'count() as event_count', - ], - batch: { - startDate, - endDate, - column: 'toDate(created_at)', - interval: 'day', - transform: (date: Date) => { - const year = date.getFullYear(); - const month = String(date.getMonth() + 1).padStart(2, '0'); - const day = String(date.getDate()).padStart(2, '0'); - return `${year}-${month}-${day}`; - }, - }, - }); - - // Wrap each INSERT with GROUP BY since we're selecting aggregates - const groupedBackfillSqls = backfillSqls.map((sql) => { - // Extract the WHERE clause and modify the query structure - const whereMatch = sql.match(/WHERE (.+)$/); - if (!whereMatch) return sql; - - const whereClause = whereMatch[1]; - const [insertPart] = sql.split('SELECT'); - - return `${insertPart} + // Generate day-by-day INSERT statements with proper GROUP BY + // Include today (endDate) in backfill to capture all historical data + const targetTable = isClustered ? 'events_daily_stats_replicated' : 'events_daily_stats'; + const backfillSqls: string[] = []; + + let currentDate = new Date(endDate); // Start from endDate (today) + + while (currentDate >= startDate) { + const dateStr = currentDate.toISOString().split('T')[0]; + + const sql = `INSERT INTO ${targetTable} SELECT project_id, name, @@ -127,11 +101,14 @@ export async function up() { uniqState(session_id) as unique_sessions_state, count() as event_count FROM events - WHERE ${whereClause} + WHERE toDate(created_at) = '${dateStr}' GROUP BY project_id, name, date`; - }); - sqls.push(...groupedBackfillSqls); + backfillSqls.push(sql); + currentDate.setDate(currentDate.getDate() - 1); + } + + sqls.push(...backfillSqls); } else { console.log('No data found in the specified date range, skipping backfill'); } From 2b4f7378960b768fdf06e1ce9672924ceaf3d046 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Wed, 21 Jan 2026 16:36:46 +0530 Subject: [PATCH 041/221] add materialise columns job (#36) --- apps/worker/src/jobs/cron.ts | 7 +++++++ packages/db/index.ts | 1 + packages/db/package.json | 1 + packages/db/prisma/schema.prisma | 16 ++++++++++++++++ 4 files changed, 25 insertions(+) diff --git a/apps/worker/src/jobs/cron.ts b/apps/worker/src/jobs/cron.ts index b50e3beb5..e7f523216 100644 --- a/apps/worker/src/jobs/cron.ts +++ b/apps/worker/src/jobs/cron.ts @@ -4,6 +4,7 @@ import { eventBuffer, profileBuffer, sessionBuffer } from '@openpanel/db'; import type { CronQueuePayload } from '@openpanel/queue'; import { jobdeleteProjects } from './cron.delete-projects'; +import { materializeColumns } from './cron.materialize-columns'; import { ping } from './cron.ping'; import { salt } from './cron.salt'; @@ -27,5 +28,11 @@ export async function cronJob(job: Job) { case 'deleteProjects': { return await jobdeleteProjects(job); } + case 'materializeColumns': { + return await materializeColumns({ + dryRun: job.data.dryRun ?? false, + threshold: job.data.threshold ?? 150, + }); + } } } diff --git a/packages/db/index.ts b/packages/db/index.ts index 58042d3f3..417d5700d 100644 --- a/packages/db/index.ts +++ b/packages/db/index.ts @@ -29,3 +29,4 @@ export * from './src/clickhouse/query-builder'; export * from './src/services/import.service'; export * from './src/services/overview.service'; export * from './src/session-context'; +export * from './src/services/materialize-columns.service'; diff --git a/packages/db/package.json b/packages/db/package.json index fb4cd393e..86f9f89e2 100644 --- a/packages/db/package.json +++ b/packages/db/package.json @@ -9,6 +9,7 @@ "migrate:deploy:code": "pnpm with-env jiti ./code-migrations/migrate.ts", "migrate:deploy:db": "pnpm with-env prisma migrate deploy", "migrate:deploy": "pnpm migrate:deploy:db && pnpm migrate:deploy:code", + "materialize:analyze": "pnpm with-env jiti ./src/cli/materialize.ts", "typecheck": "tsc --noEmit", "with-env": "dotenv -e ../../.env -c --" }, diff --git a/packages/db/prisma/schema.prisma b/packages/db/prisma/schema.prisma index 38a29105d..f44337b5c 100644 --- a/packages/db/prisma/schema.prisma +++ b/packages/db/prisma/schema.prisma @@ -497,3 +497,19 @@ model Import { @@map("imports") } + +model MaterializedColumn { + id String @id @default(dbgenerated("gen_random_uuid()")) @db.Uuid + propertyKey String @unique // e.g., "utm_source" (without "properties." prefix) + columnName String // e.g., "utm_source" (usually same as propertyKey) + cardinality Int // Number of unique values + usageCount Int // How many reports use this property + benefitScore Float // Calculated benefit score + estimatedSize BigInt // Estimated storage cost in bytes + status String // "active", "pending", "failed" + createdAt DateTime @default(now()) + materializedAt DateTime? // When ALTER TABLE was executed + lastAnalyzedAt DateTime @default(now()) @updatedAt + + @@map("materialized_columns") +} From 8c201e231f6d5349c7cb29b8e15f83c6ae77f04c Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Wed, 21 Jan 2026 16:41:33 +0530 Subject: [PATCH 042/221] add missing files (#37) --- .../src/jobs/cron.materialize-columns.ts | 36 ++ packages/db/src/cli/materialize.ts | 45 ++ .../services/materialize-columns.service.ts | 485 ++++++++++++++++++ 3 files changed, 566 insertions(+) create mode 100644 apps/worker/src/jobs/cron.materialize-columns.ts create mode 100644 packages/db/src/cli/materialize.ts create mode 100644 packages/db/src/services/materialize-columns.service.ts diff --git a/apps/worker/src/jobs/cron.materialize-columns.ts b/apps/worker/src/jobs/cron.materialize-columns.ts new file mode 100644 index 000000000..4e83fa3e6 --- /dev/null +++ b/apps/worker/src/jobs/cron.materialize-columns.ts @@ -0,0 +1,36 @@ +import { materializeColumnsService } from '@openpanel/db'; +import { logger } from '@/utils/logger'; + +export async function materializeColumns(options: { + dryRun?: boolean; + threshold?: number; +}) { + const { dryRun = false, threshold = 150 } = options; + + logger.info('Starting materialized columns cron job', { dryRun, threshold }); + + try { + const result = await materializeColumnsService.analyze({ + dryRun, + threshold, + }); + + logger.info('Materialized columns analysis complete', { + candidatesFound: result.candidates.length, + materialized: result.materialized, + dryRun, + }); + + // Log the report for visibility + console.log(result.report); + + return { + success: true, + candidates: result.candidates.length, + materialized: result.materialized.length, + }; + } catch (error) { + logger.error('Materialized columns cron job failed', { error }); + throw error; + } +} diff --git a/packages/db/src/cli/materialize.ts b/packages/db/src/cli/materialize.ts new file mode 100644 index 000000000..14ef08d42 --- /dev/null +++ b/packages/db/src/cli/materialize.ts @@ -0,0 +1,45 @@ +#!/usr/bin/env node +import { materializeColumnsService } from '../services/materialize-columns.service'; + +async function main() { + const args = process.argv.slice(2); + + const dryRun = !args.includes('--execute'); + const thresholdArg = args.find((arg) => arg.startsWith('--threshold=')); + const threshold = thresholdArg + ? Number.parseInt(thresholdArg.split('=')[1]!) + : 150; + + if (Number.isNaN(threshold) || threshold < 0) { + console.error('Error: Invalid threshold value'); + process.exit(1); + } + + console.log('Materialized Column Analyzer'); + console.log(`Mode: ${dryRun ? 'DRY RUN' : 'EXECUTE'}`); + console.log(`Threshold: ${threshold}`); + console.log(''); + + try { + const result = await materializeColumnsService.analyze({ + dryRun, + threshold, + }); + + console.log(result.report); + + if (!dryRun && result.materialized.length > 0) { + console.log('\n✅ Successfully materialized:'); + for (const prop of result.materialized) { + console.log(` - ${prop}`); + } + } + + process.exit(0); + } catch (error) { + console.error('\n❌ Error during analysis:', error); + process.exit(1); + } +} + +main(); diff --git a/packages/db/src/services/materialize-columns.service.ts b/packages/db/src/services/materialize-columns.service.ts new file mode 100644 index 000000000..82c44880c --- /dev/null +++ b/packages/db/src/services/materialize-columns.service.ts @@ -0,0 +1,485 @@ +import { type ILogger, createLogger } from '@openpanel/logger'; +import { ch } from '../clickhouse/client'; +import { chMigrationClient } from '../clickhouse/migration'; +import { db } from '../index'; + +interface PropertyUsageStats { + property: string; // Full path: "properties.utm_source" + propertyKey: string; // Key only: "utm_source" + usageCount: number; // How many reports use it + queryFrequency: number; // Estimated queries per day + cardinality: number; // Number of unique values + estimatedSize: number; // Estimated storage cost in bytes + benefit: number; // Calculated benefit score +} + +interface MaterializedColumnCandidate { + propertyKey: string; + columnName: string; + reason: string; + stats: PropertyUsageStats; +} + +export class MaterializeColumnsService { + private logger: ILogger; + + // Thresholds for materialization decisions + private readonly MIN_USAGE_COUNT = 3; // Must be used in at least 3 reports + private readonly MAX_CARDINALITY = 1000; // Don't materialize if >1000 unique values + private readonly MIN_BENEFIT_SCORE = 150; // Minimum benefit score to justify materialization + private readonly MAX_DAILY_MATERIALIZATIONS = 3; // Rate limit: max 3 new columns per day + + constructor() { + this.logger = createLogger({ name: 'materialize-columns' }); + } + + /** + * Main entry point with dry-run support + */ + async analyze(options: { + dryRun: boolean; + threshold?: number; + }): Promise<{ + candidates: MaterializedColumnCandidate[]; + report: string; + materialized: string[]; + }> { + const { dryRun, threshold = this.MIN_BENEFIT_SCORE } = options; + + this.logger.info('Starting materialized column analysis', { + dryRun, + threshold, + }); + + // Step 1: Get candidates + const candidates = await this.analyzeDashboardProperties(threshold); + + // Step 2: Generate report + const report = this.generateReport(candidates, dryRun); + + // Step 3: Execute if not dry-run + const materialized: string[] = []; + if (!dryRun && candidates.length > 0) { + // Rate limiting + const limited = candidates.slice(0, this.MAX_DAILY_MATERIALIZATIONS); + if (limited.length < candidates.length) { + this.logger.warn( + `Rate limiting: Only materializing ${limited.length} of ${candidates.length} candidates`, + ); + } + + for (const candidate of limited) { + try { + await this.materializeColumn(candidate); + materialized.push(candidate.propertyKey); + } catch (error) { + this.logger.error( + `Failed to materialize ${candidate.propertyKey}`, + { error }, + ); + } + } + } + + return { + candidates, + report, + materialized, + }; + } + + /** + * Main analysis function: Find properties that should be materialized + */ + private async analyzeDashboardProperties( + threshold: number, + ): Promise { + // Step 1: Get all properties used in reports + const propertyUsage = await this.getPropertyUsageFromReports(); + + if (propertyUsage.length === 0) { + this.logger.info('No properties found in reports'); + return []; + } + + this.logger.info(`Found ${propertyUsage.length} unique properties in reports`); + + // Step 2: Filter out already materialized columns + const existingColumns = await db.materializedColumn.findMany({ + where: { status: 'active' }, + select: { propertyKey: true }, + }); + const existingKeys = new Set(existingColumns.map((c) => c.propertyKey)); + + const newProperties = propertyUsage.filter( + (p) => !existingKeys.has(p.propertyKey), + ); + + if (newProperties.length === 0) { + this.logger.info('No new properties to analyze (all already materialized)'); + return []; + } + + this.logger.info( + `${newProperties.length} properties not yet materialized`, + ); + + // Step 3: Enrich with ClickHouse statistics + const enrichedStats = await Promise.all( + newProperties.map((usage) => this.enrichWithClickHouseStats(usage)), + ); + + // Step 4: Calculate benefit scores and filter + const candidates = enrichedStats + .map((stats) => this.calculateBenefitScore(stats)) + .filter((stats) => this.shouldMaterialize(stats, threshold)) + .map((stats) => this.createCandidate(stats)) + .sort((a, b) => b.stats.benefit - a.stats.benefit); // Sort by benefit descending + + this.logger.info(`Identified ${candidates.length} candidates for materialization`); + + return candidates; + } + + /** + * Extract properties from reports table + */ + private async getPropertyUsageFromReports(): Promise< + Array<{ + property: string; + propertyKey: string; + usageCount: number; + queryFrequency: number; + }> + > { + const reports = await db.report.findMany({ + select: { + breakdowns: true, + events: true, + }, + }); + + const propertyMap = new Map(); + + for (const report of reports) { + const properties = this.extractPropertiesFromReport(report); + for (const prop of properties) { + propertyMap.set(prop, (propertyMap.get(prop) || 0) + 1); + } + } + + // Estimate: each report queried ~10 times per day + const ESTIMATED_QUERIES_PER_DAY = 10; + + return Array.from(propertyMap.entries()).map(([property, usageCount]) => { + const propertyKey = property.replace('properties.', ''); + return { + property, + propertyKey, + usageCount, + queryFrequency: usageCount * ESTIMATED_QUERIES_PER_DAY, + }; + }); + } + + /** + * Extract property names from report JSON fields + */ + private extractPropertiesFromReport(report: { + breakdowns: any; + events: any; + }): string[] { + const properties = new Set(); + + // Parse breakdowns + try { + const breakdowns = Array.isArray(report.breakdowns) + ? report.breakdowns + : []; + for (const breakdown of breakdowns) { + if ( + breakdown?.name && + typeof breakdown.name === 'string' && + breakdown.name.startsWith('properties.') + ) { + // Skip wildcards and complex expressions + if ( + !breakdown.name.includes('*') && + !breakdown.name.includes('(') && + !breakdown.name.includes('[') + ) { + properties.add(breakdown.name); + } + } + } + } catch (e) { + this.logger.warn('Failed to parse breakdowns', { error: e }); + } + + // Parse events (filters) + try { + const events = Array.isArray(report.events) ? report.events : []; + for (const event of events) { + if (event?.filters && Array.isArray(event.filters)) { + for (const filter of event.filters) { + if ( + filter?.name && + typeof filter.name === 'string' && + filter.name.startsWith('properties.') + ) { + if ( + !filter.name.includes('*') && + !filter.name.includes('(') && + !filter.name.includes('[') + ) { + properties.add(filter.name); + } + } + } + } + } + } catch (e) { + this.logger.warn('Failed to parse event filters', { error: e }); + } + + return Array.from(properties); + } + + /** + * Get cardinality and size from ClickHouse + */ + private async enrichWithClickHouseStats(usage: { + property: string; + propertyKey: string; + usageCount: number; + queryFrequency: number; + }): Promise { + try { + const result = await ch.query({ + query: ` + SELECT + uniq(properties['${usage.propertyKey}']) as cardinality, + avg(length(properties['${usage.propertyKey}'])) as avg_length, + count() as total_rows + FROM events + WHERE properties['${usage.propertyKey}'] != '' + `, + format: 'JSONEachRow', + }); + + const data = await result.json<{ + cardinality: string; + avg_length: string; + total_rows: string; + }>(); + + const cardinality = Number(data[0]?.cardinality || 0); + const avgLength = Number(data[0]?.avg_length || 10); + const totalRows = Number(data[0]?.total_rows || 0); + + // Estimate storage: avgLength × totalRows (existing rows) + const estimatedSize = Math.ceil(avgLength * totalRows); + + return { + property: usage.property, + propertyKey: usage.propertyKey, + usageCount: usage.usageCount, + queryFrequency: usage.queryFrequency, + cardinality, + estimatedSize, + benefit: 0, + }; + } catch (error) { + this.logger.warn(`Failed to get stats for ${usage.property}`, { error }); + return { + property: usage.property, + propertyKey: usage.propertyKey, + usageCount: usage.usageCount, + queryFrequency: usage.queryFrequency, + cardinality: 0, + estimatedSize: 0, + benefit: 0, + }; + } + } + + /** + * Calculate benefit score + */ + private calculateBenefitScore( + stats: PropertyUsageStats, + ): PropertyUsageStats { + const usageScore = stats.usageCount * 10; + const frequencyScore = Math.min(stats.queryFrequency, 1000); + const cardinalityPenalty = Math.max(0, stats.cardinality - 100) * 0.5; + const sizePenalty = stats.estimatedSize / 1_000_000; // Penalty in MB + + const benefit = + usageScore + frequencyScore - cardinalityPenalty - sizePenalty; + + return { + ...stats, + benefit: Math.max(0, benefit), + }; + } + + /** + * Check if property should be materialized + */ + private shouldMaterialize( + stats: PropertyUsageStats, + threshold: number, + ): boolean { + if (stats.usageCount < this.MIN_USAGE_COUNT) { + return false; + } + + if (stats.cardinality > this.MAX_CARDINALITY) { + return false; + } + + if (stats.benefit < threshold) { + return false; + } + + return true; + } + + /** + * Create candidate object + */ + private createCandidate( + stats: PropertyUsageStats, + ): MaterializedColumnCandidate { + const columnName = stats.propertyKey; + + let reason = `Used in ${stats.usageCount} reports (~${stats.queryFrequency} queries/day). `; + + if (stats.cardinality < 50) { + reason += 'Low cardinality (ideal). '; + } else if (stats.cardinality < 200) { + reason += 'Moderate cardinality. '; + } + + if (stats.estimatedSize < 100_000_000) { + reason += 'Small storage cost. '; + } + + reason += `Benefit: ${stats.benefit.toFixed(0)}.`; + + return { + propertyKey: stats.propertyKey, + columnName, + reason, + stats, + }; + } + + /** + * Execute materialization + */ + private async materializeColumn( + candidate: MaterializedColumnCandidate, + ): Promise { + this.logger.info(`Materializing column: ${candidate.columnName}`, { + reason: candidate.reason, + }); + + try { + // Execute ALTER TABLE + await chMigrationClient.command({ + query: ` + ALTER TABLE events + ADD COLUMN IF NOT EXISTS ${candidate.columnName} String + MATERIALIZED properties['${candidate.propertyKey}'] + `, + }); + + // Record in database + await db.materializedColumn.create({ + data: { + propertyKey: candidate.propertyKey, + columnName: candidate.columnName, + cardinality: candidate.stats.cardinality, + usageCount: candidate.stats.usageCount, + benefitScore: candidate.stats.benefit, + estimatedSize: BigInt(candidate.stats.estimatedSize), + status: 'active', + materializedAt: new Date(), + }, + }); + + this.logger.info(`Successfully materialized: ${candidate.columnName}`); + } catch (error) { + // Try to record failure + try { + await db.materializedColumn.create({ + data: { + propertyKey: candidate.propertyKey, + columnName: candidate.columnName, + cardinality: candidate.stats.cardinality, + usageCount: candidate.stats.usageCount, + benefitScore: candidate.stats.benefit, + estimatedSize: BigInt(candidate.stats.estimatedSize), + status: 'failed', + }, + }); + } catch (dbError) { + this.logger.error('Failed to record failure in database', { dbError }); + } + + throw error; + } + } + + /** + * Generate human-readable report + */ + private generateReport( + candidates: MaterializedColumnCandidate[], + dryRun: boolean, + ): string { + let report = '\n' + '='.repeat(80) + '\n'; + report += dryRun + ? 'DRY RUN: Materialized Column Analysis\n' + : 'Materialized Column Analysis\n'; + report += '='.repeat(80) + '\n\n'; + + if (candidates.length === 0) { + report += 'No properties found that meet materialization criteria.\n'; + report += '\nPossible reasons:\n'; + report += ' - All eligible properties already materialized\n'; + report += ' - No properties used frequently enough (min 3 reports)\n'; + report += ' - Properties have too high cardinality (>1000 unique values)\n'; + report += ' - Benefit score below threshold\n'; + return report; + } + + report += `Found ${candidates.length} candidate(s) for materialization:\n\n`; + + for (let i = 0; i < candidates.length; i++) { + const candidate = candidates[i]!; + report += `${i + 1}. properties.${candidate.propertyKey}\n`; + report += ` Column: ${candidate.columnName}\n`; + report += ` Usage: ${candidate.stats.usageCount} reports, ~${candidate.stats.queryFrequency} queries/day\n`; + report += ` Cardinality: ${candidate.stats.cardinality} unique values\n`; + report += ` Storage: ~${(candidate.stats.estimatedSize / 1_000_000).toFixed(2)} MB\n`; + report += ` Benefit Score: ${candidate.stats.benefit.toFixed(2)}\n`; + report += ` ${candidate.reason}\n`; + report += '\n'; + } + + if (dryRun) { + report += '⚠️ DRY RUN MODE: No changes will be made.\n'; + report += 'Run with --execute flag to materialize these columns.\n'; + } else { + report += `✅ Materializing top ${Math.min(candidates.length, this.MAX_DAILY_MATERIALIZATIONS)} columns...\n`; + } + + report += '\n' + '='.repeat(80) + '\n'; + + return report; + } +} + +export const materializeColumnsService = new MaterializeColumnsService(); From 7d96887e4f154874f66471abe7669315de849927 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Wed, 21 Jan 2026 16:46:35 +0530 Subject: [PATCH 043/221] fix import (#38) --- packages/db/src/services/materialize-columns.service.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/db/src/services/materialize-columns.service.ts b/packages/db/src/services/materialize-columns.service.ts index 82c44880c..e7427fb85 100644 --- a/packages/db/src/services/materialize-columns.service.ts +++ b/packages/db/src/services/materialize-columns.service.ts @@ -1,7 +1,7 @@ import { type ILogger, createLogger } from '@openpanel/logger'; import { ch } from '../clickhouse/client'; import { chMigrationClient } from '../clickhouse/migration'; -import { db } from '../index'; +import { db } from '../../index'; interface PropertyUsageStats { property: string; // Full path: "properties.utm_source" From 0cf6d38331786ba90280a3cafe74e685340b0141 Mon Sep 17 00:00:00 2001 From: ayushjhanwar-png Date: Wed, 21 Jan 2026 16:52:35 +0530 Subject: [PATCH 044/221] remove properties here (#39) --- packages/db/src/services/import.service.ts | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/packages/db/src/services/import.service.ts b/packages/db/src/services/import.service.ts index 672febf7d..1e50cebb4 100644 --- a/packages/db/src/services/import.service.ts +++ b/packages/db/src/services/import.service.ts @@ -456,7 +456,6 @@ export async function moveImportsToProduction( referrer_name, referrer_type, duration, - properties, created_at, country, city, @@ -472,7 +471,7 @@ export async function moveImportsToProduction( model, imported_at ) - SELECT + SELECT id, name, sdk_name, @@ -487,7 +486,6 @@ export async function moveImportsToProduction( referrer_name, referrer_type, duration, - properties, created_at, country, city, @@ -562,7 +560,6 @@ export async function backfillSessionsToProduction( os_version, sign, version, - properties, utm_medium, utm_source, utm_campaign, @@ -572,7 +569,7 @@ export async function backfillSessionsToProduction( referrer_name, referrer_type ) - SELECT + SELECT any(e.session_id) as id, any(e.project_id) as project_id, if(any(nullIf(e.profile_id, e.device_id)) IS NULL, any(e.profile_id), any(nullIf(e.profile_id, e.device_id))) as profile_id, @@ -606,7 +603,6 @@ export async function backfillSessionsToProduction( argMinIf(e.os_version, e.created_at, e.name = 'session_start') as os_version, 1 as sign, 1 as version, - argMinIf(e.properties, e.created_at, e.name = 'session_start') as properties, argMinIf(e.properties['__query.utm_medium'], e.created_at, e.name = 'session_start') as utm_medium, argMinIf(e.properties['__query.utm_source'], e.created_at, e.name = 'session_start') as utm_source, argMinIf(e.properties['__query.utm_campaign'], e.created_at, e.name = 'session_start') as utm_campaign, From 940d2f5ba72403909fa842a7f0a346b5f7b5afc1 Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Wed, 21 Jan 2026 18:15:41 +0530 Subject: [PATCH 045/221] optimise property queries (#40) --- .../services/materialize-columns.service.ts | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/packages/db/src/services/materialize-columns.service.ts b/packages/db/src/services/materialize-columns.service.ts index e7427fb85..313db218f 100644 --- a/packages/db/src/services/materialize-columns.service.ts +++ b/packages/db/src/services/materialize-columns.service.ts @@ -246,7 +246,8 @@ export class MaterializeColumnsService { } /** - * Get cardinality and size from ClickHouse + * Get cardinality and size from ClickHouse using event_property_values_mv + * This is MUCH faster than scanning the entire events table */ private async enrichWithClickHouseStats(usage: { property: string; @@ -255,14 +256,16 @@ export class MaterializeColumnsService { queryFrequency: number; }): Promise { try { + // Use the materialized view instead of scanning events table const result = await ch.query({ query: ` SELECT - uniq(properties['${usage.propertyKey}']) as cardinality, - avg(length(properties['${usage.propertyKey}'])) as avg_length, - count() as total_rows - FROM events - WHERE properties['${usage.propertyKey}'] != '' + uniqExact(property_value) as cardinality, + avg(length(property_value)) as avg_length, + count() as total_occurrences + FROM event_property_values_mv + WHERE property_key = '${usage.propertyKey}' + AND property_value != '' `, format: 'JSONEachRow', }); @@ -270,15 +273,16 @@ export class MaterializeColumnsService { const data = await result.json<{ cardinality: string; avg_length: string; - total_rows: string; + total_occurrences: string; }>(); const cardinality = Number(data[0]?.cardinality || 0); const avgLength = Number(data[0]?.avg_length || 10); - const totalRows = Number(data[0]?.total_rows || 0); + const totalOccurrences = Number(data[0]?.total_occurrences || 0); - // Estimate storage: avgLength × totalRows (existing rows) - const estimatedSize = Math.ceil(avgLength * totalRows); + // Estimate storage: avgLength × totalOccurrences + // Note: totalOccurrences is how many times this property appears, not event count + const estimatedSize = Math.ceil(avgLength * totalOccurrences); return { property: usage.property, From 51222c5662cbc38366ffa5d98dd32239104a927e Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Wed, 21 Jan 2026 18:41:26 +0530 Subject: [PATCH 046/221] add reasons for skipping materialisation (#41) --- .../services/materialize-columns.service.ts | 182 ++++++++++++------ 1 file changed, 125 insertions(+), 57 deletions(-) diff --git a/packages/db/src/services/materialize-columns.service.ts b/packages/db/src/services/materialize-columns.service.ts index 313db218f..af39d03fe 100644 --- a/packages/db/src/services/materialize-columns.service.ts +++ b/packages/db/src/services/materialize-columns.service.ts @@ -13,6 +13,10 @@ interface PropertyUsageStats { benefit: number; // Calculated benefit score } +interface PropertyAnalysis extends PropertyUsageStats { + skipReason?: string; // Why it wasn't materialized (if skipped) +} + interface MaterializedColumnCandidate { propertyKey: string; columnName: string; @@ -41,6 +45,7 @@ export class MaterializeColumnsService { threshold?: number; }): Promise<{ candidates: MaterializedColumnCandidate[]; + allProperties: PropertyAnalysis[]; report: string; materialized: string[]; }> { @@ -51,11 +56,11 @@ export class MaterializeColumnsService { threshold, }); - // Step 1: Get candidates - const candidates = await this.analyzeDashboardProperties(threshold); + // Step 1: Get candidates and all analyzed properties + const { candidates, allProperties } = await this.analyzeDashboardProperties(threshold); // Step 2: Generate report - const report = this.generateReport(candidates, dryRun); + const report = this.generateReport(candidates, allProperties, dryRun); // Step 3: Execute if not dry-run const materialized: string[] = []; @@ -83,6 +88,7 @@ export class MaterializeColumnsService { return { candidates, + allProperties, report, materialized, }; @@ -93,31 +99,52 @@ export class MaterializeColumnsService { */ private async analyzeDashboardProperties( threshold: number, - ): Promise { + ): Promise<{ + candidates: MaterializedColumnCandidate[]; + allProperties: PropertyAnalysis[]; + }> { // Step 1: Get all properties used in reports const propertyUsage = await this.getPropertyUsageFromReports(); if (propertyUsage.length === 0) { this.logger.info('No properties found in reports'); - return []; + return { candidates: [], allProperties: [] }; } this.logger.info(`Found ${propertyUsage.length} unique properties in reports`); - // Step 2: Filter out already materialized columns + // Step 2: Check already materialized columns const existingColumns = await db.materializedColumn.findMany({ where: { status: 'active' }, select: { propertyKey: true }, }); const existingKeys = new Set(existingColumns.map((c) => c.propertyKey)); + // Separate already materialized from new properties + const alreadyMaterialized = propertyUsage.filter((p) => + existingKeys.has(p.propertyKey), + ); const newProperties = propertyUsage.filter( (p) => !existingKeys.has(p.propertyKey), ); + // Track all properties with their analysis + const allProperties: PropertyAnalysis[] = []; + + // Add already materialized properties + allProperties.push( + ...alreadyMaterialized.map((p) => ({ + ...p, + cardinality: 0, + estimatedSize: 0, + benefit: 0, + skipReason: '✅ Already materialized', + })), + ); + if (newProperties.length === 0) { this.logger.info('No new properties to analyze (all already materialized)'); - return []; + return { candidates: [], allProperties }; } this.logger.info( @@ -129,16 +156,57 @@ export class MaterializeColumnsService { newProperties.map((usage) => this.enrichWithClickHouseStats(usage)), ); - // Step 4: Calculate benefit scores and filter - const candidates = enrichedStats - .map((stats) => this.calculateBenefitScore(stats)) - .filter((stats) => this.shouldMaterialize(stats, threshold)) + // Step 4: Calculate benefit scores + const statsWithBenefit = enrichedStats.map((stats) => + this.calculateBenefitScore(stats), + ); + + // Step 5: Determine skip reasons and separate candidates + const analyzed = statsWithBenefit.map((stats) => { + const skipReason = this.getSkipReason(stats, threshold); + return { + ...stats, + skipReason, + }; + }); + + allProperties.push(...analyzed); + + // Extract candidates (those without skip reason) + const candidates = analyzed + .filter((stat) => !stat.skipReason) .map((stats) => this.createCandidate(stats)) - .sort((a, b) => b.stats.benefit - a.stats.benefit); // Sort by benefit descending + .sort((a, b) => b.stats.benefit - a.stats.benefit); this.logger.info(`Identified ${candidates.length} candidates for materialization`); - return candidates; + return { + candidates, + allProperties: allProperties.sort((a, b) => b.benefit - a.benefit), // Sort by benefit + }; + } + + /** + * Determine why a property should be skipped + */ + private getSkipReason(stats: PropertyUsageStats, threshold: number): string | undefined { + if (stats.usageCount < this.MIN_USAGE_COUNT) { + return `❌ Low usage (${stats.usageCount} reports, need ${this.MIN_USAGE_COUNT})`; + } + + if (stats.cardinality === 0) { + return `❌ No data found in event_property_values_mv`; + } + + if (stats.cardinality > this.MAX_CARDINALITY) { + return `❌ Too high cardinality (${stats.cardinality} values > ${this.MAX_CARDINALITY} limit)`; + } + + if (stats.benefit < threshold) { + return `❌ Benefit too low (${stats.benefit.toFixed(0)} < ${threshold} threshold)`; + } + + return undefined; // Should be materialized } /** @@ -327,28 +395,6 @@ export class MaterializeColumnsService { }; } - /** - * Check if property should be materialized - */ - private shouldMaterialize( - stats: PropertyUsageStats, - threshold: number, - ): boolean { - if (stats.usageCount < this.MIN_USAGE_COUNT) { - return false; - } - - if (stats.cardinality > this.MAX_CARDINALITY) { - return false; - } - - if (stats.benefit < threshold) { - return false; - } - - return true; - } - /** * Create candidate object */ @@ -437,10 +483,11 @@ export class MaterializeColumnsService { } /** - * Generate human-readable report + * Generate human-readable report with ALL properties */ private generateReport( candidates: MaterializedColumnCandidate[], + allProperties: PropertyAnalysis[], dryRun: boolean, ): string { let report = '\n' + '='.repeat(80) + '\n'; @@ -449,35 +496,56 @@ export class MaterializeColumnsService { : 'Materialized Column Analysis\n'; report += '='.repeat(80) + '\n\n'; - if (candidates.length === 0) { - report += 'No properties found that meet materialization criteria.\n'; - report += '\nPossible reasons:\n'; - report += ' - All eligible properties already materialized\n'; - report += ' - No properties used frequently enough (min 3 reports)\n'; - report += ' - Properties have too high cardinality (>1000 unique values)\n'; - report += ' - Benefit score below threshold\n'; - return report; + report += `Total properties analyzed: ${allProperties.length}\n`; + report += `Candidates for materialization: ${candidates.length}\n\n`; + + // Section 1: Candidates (will be materialized) + if (candidates.length > 0) { + report += '━'.repeat(80) + '\n'; + report += '✅ RECOMMENDED FOR MATERIALIZATION\n'; + report += '━'.repeat(80) + '\n\n'; + + for (let i = 0; i < candidates.length; i++) { + const candidate = candidates[i]!; + report += `${i + 1}. properties.${candidate.propertyKey}\n`; + report += ` Usage: ${candidate.stats.usageCount} reports, ~${candidate.stats.queryFrequency} queries/day\n`; + report += ` Cardinality: ${candidate.stats.cardinality} unique values\n`; + report += ` Storage: ~${(candidate.stats.estimatedSize / 1_000_000).toFixed(2)} MB\n`; + report += ` Benefit Score: ${candidate.stats.benefit.toFixed(2)}\n`; + report += ` Reason: ${candidate.reason}\n\n`; + } } - report += `Found ${candidates.length} candidate(s) for materialization:\n\n`; - - for (let i = 0; i < candidates.length; i++) { - const candidate = candidates[i]!; - report += `${i + 1}. properties.${candidate.propertyKey}\n`; - report += ` Column: ${candidate.columnName}\n`; - report += ` Usage: ${candidate.stats.usageCount} reports, ~${candidate.stats.queryFrequency} queries/day\n`; - report += ` Cardinality: ${candidate.stats.cardinality} unique values\n`; - report += ` Storage: ~${(candidate.stats.estimatedSize / 1_000_000).toFixed(2)} MB\n`; - report += ` Benefit Score: ${candidate.stats.benefit.toFixed(2)}\n`; - report += ` ${candidate.reason}\n`; - report += '\n'; + // Section 2: All other properties with skip reasons + const skipped = allProperties.filter((p) => p.skipReason); + if (skipped.length > 0) { + report += '━'.repeat(80) + '\n'; + report += 'ALL PROPERTIES ANALYZED\n'; + report += '━'.repeat(80) + '\n\n'; + + for (const prop of skipped) { + report += `• properties.${prop.propertyKey}\n`; + report += ` ${prop.skipReason}\n`; + report += ` Usage: ${prop.usageCount} reports, ~${prop.queryFrequency} queries/day`; + if (prop.cardinality > 0) { + report += `, Cardinality: ${prop.cardinality}, Benefit: ${prop.benefit.toFixed(0)}`; + } + report += '\n\n'; + } } + // Summary + report += '━'.repeat(80) + '\n'; + report += 'SUMMARY\n'; + report += '━'.repeat(80) + '\n'; + if (dryRun) { report += '⚠️ DRY RUN MODE: No changes will be made.\n'; report += 'Run with --execute flag to materialize these columns.\n'; - } else { + } else if (candidates.length > 0) { report += `✅ Materializing top ${Math.min(candidates.length, this.MAX_DAILY_MATERIALIZATIONS)} columns...\n`; + } else { + report += 'No actions needed. All eligible properties are already materialized.\n'; } report += '\n' + '='.repeat(80) + '\n'; From 3a4437734594f3fabca3bca4a21f5899859d054e Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Wed, 21 Jan 2026 19:19:59 +0530 Subject: [PATCH 047/221] change thresholds for materialisation (#42) --- .../services/materialize-columns.service.ts | 60 ++++++++++++++++--- 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/packages/db/src/services/materialize-columns.service.ts b/packages/db/src/services/materialize-columns.service.ts index af39d03fe..299418a04 100644 --- a/packages/db/src/services/materialize-columns.service.ts +++ b/packages/db/src/services/materialize-columns.service.ts @@ -28,9 +28,9 @@ export class MaterializeColumnsService { private logger: ILogger; // Thresholds for materialization decisions - private readonly MIN_USAGE_COUNT = 3; // Must be used in at least 3 reports + private readonly MIN_USAGE_COUNT = 1; // Must be used in at least 1 report private readonly MAX_CARDINALITY = 1000; // Don't materialize if >1000 unique values - private readonly MIN_BENEFIT_SCORE = 150; // Minimum benefit score to justify materialization + private readonly MIN_BENEFIT_SCORE = 20; // Minimum benefit score to justify materialization private readonly MAX_DAILY_MATERIALIZATIONS = 3; // Rate limit: max 3 new columns per day constructor() { @@ -113,32 +113,50 @@ export class MaterializeColumnsService { this.logger.info(`Found ${propertyUsage.length} unique properties in reports`); - // Step 2: Check already materialized columns + // Step 2: Check already materialized columns from database tracking const existingColumns = await db.materializedColumn.findMany({ where: { status: 'active' }, select: { propertyKey: true }, }); const existingKeys = new Set(existingColumns.map((c) => c.propertyKey)); - // Separate already materialized from new properties - const alreadyMaterialized = propertyUsage.filter((p) => + // Step 3: Check if columns already exist in ClickHouse events table + const clickhouseColumns = await this.getExistingClickHouseColumns(); + const clickhouseColumnNames = new Set(clickhouseColumns); + + // Separate properties into categories + const alreadyTracked = propertyUsage.filter((p) => existingKeys.has(p.propertyKey), ); + const alreadyExistsInClickHouse = propertyUsage.filter( + (p) => !existingKeys.has(p.propertyKey) && clickhouseColumnNames.has(p.propertyKey), + ); const newProperties = propertyUsage.filter( - (p) => !existingKeys.has(p.propertyKey), + (p) => !existingKeys.has(p.propertyKey) && !clickhouseColumnNames.has(p.propertyKey), ); // Track all properties with their analysis const allProperties: PropertyAnalysis[] = []; - // Add already materialized properties + // Add properties already tracked in database + allProperties.push( + ...alreadyTracked.map((p) => ({ + ...p, + cardinality: 0, + estimatedSize: 0, + benefit: 0, + skipReason: '✅ Already materialized (tracked)', + })), + ); + + // Add properties that already exist as columns in ClickHouse allProperties.push( - ...alreadyMaterialized.map((p) => ({ + ...alreadyExistsInClickHouse.map((p) => ({ ...p, cardinality: 0, estimatedSize: 0, benefit: 0, - skipReason: '✅ Already materialized', + skipReason: '✅ Column already exists in events table', })), ); @@ -186,6 +204,30 @@ export class MaterializeColumnsService { }; } + /** + * Get existing materialized column names from ClickHouse events table + */ + private async getExistingClickHouseColumns(): Promise { + try { + const result = await ch.query({ + query: ` + SELECT name + FROM system.columns + WHERE database = 'default' + AND table = 'events' + AND default_kind = 'MATERIALIZED' + `, + format: 'JSONEachRow', + }); + + const data = await result.json<{ name: string }>(); + return data.map((row) => row.name); + } catch (error) { + this.logger.warn('Failed to get existing ClickHouse columns', { error }); + return []; + } + } + /** * Determine why a property should be skipped */ From 3272efd0fb69964b4da1779cc00e2dd12d4c6669 Mon Sep 17 00:00:00 2001 From: Ayush Jhanwar Date: Fri, 23 Jan 2026 15:46:26 +0530 Subject: [PATCH 048/221] fix: add chunking to CSV import to reduce memory usage - Split large CSV batches into configurable chunks (default 10k rows) - Reduces peak memory from 8GB+ to ~1-2GB per chunk - Prevents OOM errors with 50k batch size - Configurable via IMPORT_CSV_CHUNK_SIZE env var - Falls back to direct insert for batches smaller than chunk size --- packages/db/src/clickhouse/client.ts | 37 +++++++++++++++++----------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/packages/db/src/clickhouse/client.ts b/packages/db/src/clickhouse/client.ts index 50cfbb177..a8c931a1d 100644 --- a/packages/db/src/clickhouse/client.ts +++ b/packages/db/src/clickhouse/client.ts @@ -246,24 +246,33 @@ export async function chQueryWithMeta>( export async function chInsertCSV(tableName: string, rows: string[]) { try { const now = performance.now(); - // Create a readable stream in binary mode for CSV (similar to EventBuffer) - const csvStream = Readable.from(rows.join('\n'), { - objectMode: false, - }); - - await ch.insert({ - table: tableName, - values: csvStream, - format: 'CSV', - clickhouse_settings: { - format_csv_allow_double_quotes: 1, - format_csv_allow_single_quotes: 0, - }, - }); + const chunkSize = Number.parseInt( + process.env.IMPORT_CSV_CHUNK_SIZE || '10000', + 10, + ); + + // Insert in chunks to reduce memory pressure + for (let i = 0; i < rows.length; i += chunkSize) { + const chunk = rows.slice(i, i + chunkSize); + const csvStream = Readable.from(chunk.join('\n'), { + objectMode: false, + }); + + await ch.insert({ + table: tableName, + values: csvStream, + format: 'CSV', + clickhouse_settings: { + format_csv_allow_double_quotes: 1, + format_csv_allow_single_quotes: 0, + }, + }); + } logger.info('CSV Insert successful', { elapsed: performance.now() - now, rows: rows.length, + chunks: Math.ceil(rows.length / chunkSize), }); } catch (error) { logger.error('CSV Insert failed:', error); From d2c45a7d71269ebdeb237282aeac239fea1ec40f Mon Sep 17 00:00:00 2001 From: Ayush Jhanwar Date: Fri, 23 Jan 2026 17:20:49 +0530 Subject: [PATCH 049/221] #d2p-openpanel From 7d13c63396a54e37d26c74cf361d5f0f71c6c31b Mon Sep 17 00:00:00 2001 From: Ayush Jhanwar Date: Fri, 23 Jan 2026 19:37:00 +0530 Subject: [PATCH 050/221] fix: resolve memory leak in import worker Fix cumulative memory growth from 7GB to 16GB per hour during imports. Changes: - Replace eventBatch.length = 0 with eventBatch = [] to release array buffer - Add explicit csvRows = null after insert to enable garbage collection - Add memory logging (heapUsedMB, heapTotalMB, externalMB) per batch Root cause: V8 keeps array buffer capacity allocated when using length = 0. With 298 socket retry errors, csvRows arrays pile up faster than GC runs. Expected result: Memory should stabilize at 4-5GB instead of growing linearly. --- apps/worker/src/jobs/import.ts | 22 ++++++++++++++++++---- packages/db/src/services/import.service.ts | 5 ++++- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/apps/worker/src/jobs/import.ts b/apps/worker/src/jobs/import.ts index f03a053cc..b9986bd87 100644 --- a/apps/worker/src/jobs/import.ts +++ b/apps/worker/src/jobs/import.ts @@ -139,7 +139,7 @@ export async function importJob(job: Job) { // Phase 1: Fetch & Transform - Process events in batches if (shouldRunStep('loading')) { - const eventBatch: any = []; + let eventBatch: any = []; for await (const rawEvent of providerInstance.parseSource( resumeLoadingFrom, )) { @@ -158,7 +158,13 @@ export async function importJob(job: Job) { // Process batch when it reaches the batch size if (eventBatch.length >= BATCH_SIZE) { - jobLogger.info('Processing batch', { batchSize: eventBatch.length }); + const memUsage = process.memoryUsage(); + jobLogger.info('Processing batch', { + batchSize: eventBatch.length, + heapUsedMB: Math.round(memUsage.heapUsed / 1024 / 1024), + heapTotalMB: Math.round(memUsage.heapTotal / 1024 / 1024), + externalMB: Math.round(memUsage.external / 1024 / 1024), + }); const transformedEvents: IClickhouseEvent[] = eventBatch.map( ( @@ -170,7 +176,7 @@ export async function importJob(job: Job) { await insertImportBatch(transformedEvents, importId); processedEvents += eventBatch.length; - eventBatch.length = 0; + eventBatch = []; const createdAt = new Date(transformedEvents[0]?.created_at || '') .toISOString() @@ -190,6 +196,14 @@ export async function importJob(job: Job) { // Process remaining events in the last batch if (eventBatch.length > 0) { + const memUsage = process.memoryUsage(); + jobLogger.info('Processing final batch', { + batchSize: eventBatch.length, + heapUsedMB: Math.round(memUsage.heapUsed / 1024 / 1024), + heapTotalMB: Math.round(memUsage.heapTotal / 1024 / 1024), + externalMB: Math.round(memUsage.external / 1024 / 1024), + }); + const transformedEvents = eventBatch.map( ( // @ts-expect-error @@ -200,7 +214,7 @@ export async function importJob(job: Job) { await insertImportBatch(transformedEvents, importId); processedEvents += eventBatch.length; - eventBatch.length = 0; + eventBatch = []; const createdAt = new Date(transformedEvents[0]?.created_at || '') .toISOString() diff --git a/packages/db/src/services/import.service.ts b/packages/db/src/services/import.service.ts index 1e50cebb4..f8045ac5d 100644 --- a/packages/db/src/services/import.service.ts +++ b/packages/db/src/services/import.service.ts @@ -38,7 +38,7 @@ export async function insertImportBatch( // Important to have same order as events_imports table // CSV format: properly quotes fields that need it - const csvRows = events.map((event) => { + let csvRows = events.map((event) => { // Properties need to be converted to JSON for Map(String, String) // All fields must be CSV-escaped when joining with commas const fields = [ @@ -80,6 +80,9 @@ export async function insertImportBatch( await chInsertCSV(TABLE_NAMES.events_imports, csvRows); + // Explicitly release memory + csvRows = null as any; + return { importId, totalEvents: events.length, From a7d2a5d6d82cfcea1a86176fc94fcd20f3bd3f74 Mon Sep 17 00:00:00 2001 From: Ayush Jhanwar Date: Sat, 24 Jan 2026 10:57:25 +0530 Subject: [PATCH 051/221] fix: socket hangup retries memory leak issue --- apps/worker/src/jobs/import.ts | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/apps/worker/src/jobs/import.ts b/apps/worker/src/jobs/import.ts index b9986bd87..03b767e88 100644 --- a/apps/worker/src/jobs/import.ts +++ b/apps/worker/src/jobs/import.ts @@ -166,21 +166,22 @@ export async function importJob(job: Job) { externalMB: Math.round(memUsage.external / 1024 / 1024), }); - const transformedEvents: IClickhouseEvent[] = eventBatch.map( + let transformedEvents: IClickhouseEvent[] = eventBatch.map( ( // @ts-expect-error event, ) => providerInstance!.transformEvent(event), ); + const createdAt = new Date(transformedEvents[0]?.created_at || '') + .toISOString() + .split('T')[0]; + await insertImportBatch(transformedEvents, importId); processedEvents += eventBatch.length; eventBatch = []; - - const createdAt = new Date(transformedEvents[0]?.created_at || '') - .toISOString() - .split('T')[0]; + transformedEvents = null as any; await updateImportStatus(jobLogger, job, importId, { step: 'loading', @@ -204,21 +205,22 @@ export async function importJob(job: Job) { externalMB: Math.round(memUsage.external / 1024 / 1024), }); - const transformedEvents = eventBatch.map( + let transformedEvents = eventBatch.map( ( // @ts-expect-error event, ) => providerInstance!.transformEvent(event), ); + const createdAt = new Date(transformedEvents[0]?.created_at || '') + .toISOString() + .split('T')[0]; + await insertImportBatch(transformedEvents, importId); processedEvents += eventBatch.length; eventBatch = []; - - const createdAt = new Date(transformedEvents[0]?.created_at || '') - .toISOString() - .split('T')[0]; + transformedEvents = null as any; await updateImportStatus(jobLogger, job, importId, { step: 'loading', From e8d8eb08292c77a847d7a2c42767c361764e264c Mon Sep 17 00:00:00 2001 From: HARSHIT GUPTA Date: Sat, 24 Jan 2026 11:17:46 +0530 Subject: [PATCH 052/221] add custom cohort capability (#43) * add custom cohort capability * add sample ci * try fixing issues --- .github/workflows/test-build.yml | 89 +++ .../cohort/cohort-criteria-builder.tsx | 553 ++++++++++++++++++ .../components/forms/textarea-with-label.tsx | 24 + .../report/sidebar/PropertiesCombobox.tsx | 77 ++- .../report/sidebar/ReportEvents.tsx | 9 +- .../report/sidebar/filters/FilterItem.tsx | 31 +- apps/start/src/modals/add-cohort.tsx | 137 +++++ apps/start/src/modals/edit-cohort.tsx | 134 +++++ apps/start/src/modals/index.tsx | 4 + ...app.$organizationId.$projectId.cohorts.tsx | 188 ++++++ apps/start/src/utils/title.ts | 1 + packages/constants/index.ts | 2 + packages/db/code-migrations/4-cohorts.ts | 128 ++++ packages/db/index.ts | 1 + packages/db/prisma/schema.prisma | 25 + packages/db/src/clickhouse/client.ts | 3 + packages/db/src/services/chart.service.ts | 57 +- .../db/src/services/cohort.service.test.ts | 420 +++++++++++++ packages/db/src/services/cohort.service.ts | 485 +++++++++++++++ packages/db/vitest.config.ts | 3 + packages/trpc/src/root.ts | 2 + packages/trpc/src/routers/cohort.ts | 369 ++++++++++++ .../validation/src/cohort.validation.test.ts | 371 ++++++++++++ packages/validation/src/cohort.validation.ts | 135 +++++ packages/validation/src/index.ts | 17 + 25 files changed, 3257 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/test-build.yml create mode 100644 apps/start/src/components/cohort/cohort-criteria-builder.tsx create mode 100644 apps/start/src/components/forms/textarea-with-label.tsx create mode 100644 apps/start/src/modals/add-cohort.tsx create mode 100644 apps/start/src/modals/edit-cohort.tsx create mode 100644 apps/start/src/routes/_app.$organizationId.$projectId.cohorts.tsx create mode 100644 packages/db/code-migrations/4-cohorts.ts create mode 100644 packages/db/src/services/cohort.service.test.ts create mode 100644 packages/db/src/services/cohort.service.ts create mode 100644 packages/db/vitest.config.ts create mode 100644 packages/trpc/src/routers/cohort.ts create mode 100644 packages/validation/src/cohort.validation.test.ts create mode 100644 packages/validation/src/cohort.validation.ts diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml new file mode 100644 index 000000000..3c8a6250e --- /dev/null +++ b/.github/workflows/test-build.yml @@ -0,0 +1,89 @@ +name: Test Build + +on: + push: + branches: + - '**' + - '!main' + paths: + - "apps/api/**" + - "apps/worker/**" + - "apps/start/**" + - "packages/**" + - "!packages/sdks/**" + - "**Dockerfile" + - ".github/workflows/**" + +jobs: + changes: + runs-on: ubuntu-latest + outputs: + api: ${{ steps.filter.outputs.api }} + worker: ${{ steps.filter.outputs.worker }} + public: ${{ steps.filter.outputs.public }} + dashboard: ${{ steps.filter.outputs.dashboard }} + steps: + - uses: actions/checkout@v4 + - uses: dorny/paths-filter@v2 + id: filter + with: + base: ${{ github.ref }} + filters: | + api: + - 'apps/api/**' + - 'packages/**' + - '.github/workflows/**' + worker: + - 'apps/worker/**' + - 'packages/**' + - '.github/workflows/**' + public: + - 'apps/public/**' + - 'packages/**' + - '.github/workflows/**' + dashboard: + - 'apps/start/**' + - 'packages/**' + - '.github/workflows/**' + + test-build-api: + needs: changes + if: ${{ needs.changes.outputs.api == 'true' }} + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Test build + run: docker buildx build --file apps/api/Dockerfile --build-arg DATABASE_URL=postgresql://dummy:dummy@localhost:5432/dummy . + + test-build-worker: + needs: changes + if: ${{ needs.changes.outputs.worker == 'true' }} + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Test build + run: docker buildx build --file apps/worker/Dockerfile --build-arg DATABASE_URL=postgresql://dummy:dummy@localhost:5432/dummy . + + test-build-dashboard: + needs: changes + if: ${{ needs.changes.outputs.dashboard == 'true' }} + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Test build + run: docker buildx build --file apps/start/Dockerfile --build-arg NO_CLOUDFLARE=1 . diff --git a/apps/start/src/components/cohort/cohort-criteria-builder.tsx b/apps/start/src/components/cohort/cohort-criteria-builder.tsx new file mode 100644 index 000000000..97daf44ed --- /dev/null +++ b/apps/start/src/components/cohort/cohort-criteria-builder.tsx @@ -0,0 +1,553 @@ +import { Button } from '@/components/ui/button'; +import { ComboboxAdvanced } from '@/components/ui/combobox-advanced'; +import { DropdownMenuComposed } from '@/components/ui/dropdown-menu'; +import { InputWithLabel } from '@/components/forms/input-with-label'; +import { useAppParams } from '@/hooks/use-app-params'; +import { useEventNames } from '@/hooks/use-event-names'; +import { operators } from '@openpanel/constants'; +import type { + CohortDefinition, + EventBasedCohortDefinition, + PropertyBasedCohortDefinition, + EventCriteria, + IChartEventFilter, +} from '@openpanel/validation'; +import { mapKeys } from '@openpanel/validation'; +import { PlusIcon, TrashIcon } from 'lucide-react'; +import { useState } from 'react'; +import { ColorSquare } from '@/components/color-square'; +import { SlidersHorizontal } from 'lucide-react'; +import { PureFilterItem } from '@/components/report/sidebar/filters/FilterItem'; + +interface CohortCriteriaBuilderProps { + definition: CohortDefinition; + onChange: (definition: CohortDefinition) => void; +} + +export function CohortCriteriaBuilder({ + definition, + onChange, +}: CohortCriteriaBuilderProps) { + const { projectId } = useAppParams(); + const eventNames = useEventNames({ projectId }); + + const handleTypeChange = (type: 'event' | 'property') => { + if (type === 'event') { + onChange({ + type: 'event', + criteria: { + events: [], + operator: 'or', + }, + }); + } else { + onChange({ + type: 'property', + criteria: { + properties: [], + operator: 'or', + }, + }); + } + }; + + return ( +
+
+ + +
+ + {definition.type === 'event' && ( + + )} + + {definition.type === 'property' && ( + + )} +
+ ); +} + +interface EventBasedBuilderProps { + definition: EventBasedCohortDefinition; + onChange: (definition: EventBasedCohortDefinition) => void; + eventNames: string[]; +} + +function EventBasedBuilder({ + definition, + onChange, + eventNames: eventNamesArray, +}: EventBasedBuilderProps) { + // Transform array of strings to format expected by ComboboxAdvanced + const eventNames = eventNamesArray.map((name) => ({ + value: name, + label: name, + count: 0, + })); + const addEventCriteria = () => { + onChange({ + ...definition, + criteria: { + ...definition.criteria, + events: [ + ...definition.criteria.events, + { + name: '', + filters: [], + timeframe: { type: 'relative', value: '30d' }, + frequency: { operator: 'gte', value: 1 }, + }, + ], + }, + }); + }; + + const removeEventCriteria = (index: number) => { + onChange({ + ...definition, + criteria: { + ...definition.criteria, + events: definition.criteria.events.filter((_, i) => i !== index), + }, + }); + }; + + const updateEventCriteria = (index: number, criteria: EventCriteria) => { + onChange({ + ...definition, + criteria: { + ...definition.criteria, + events: definition.criteria.events.map((e, i) => + i === index ? criteria : e, + ), + }, + }); + }; + + const updateOperator = (operator: 'or' | 'and') => { + onChange({ + ...definition, + criteria: { + ...definition.criteria, + operator, + }, + }); + }; + + return ( +
+
+ Match + + + +
+ + {definition.criteria.events.map((eventCriteria, index) => ( + updateEventCriteria(index, criteria)} + onRemove={() => removeEventCriteria(index)} + eventNames={eventNames} + /> + ))} + + +
+ ); +} + +interface EventCriteriaItemProps { + criteria: EventCriteria; + onChange: (criteria: EventCriteria) => void; + onRemove: () => void; + eventNames: Array<{ value: string; label: string; count: number }>; +} + +function EventCriteriaItem({ + criteria, + onChange, + onRemove, + eventNames, +}: EventCriteriaItemProps) { + const addFilter = () => { + onChange({ + ...criteria, + filters: [ + ...criteria.filters, + { + id: Math.random().toString(36).substring(7), + name: '', + operator: 'is', + value: [], + }, + ], + }); + }; + + const removeFilter = (filter: IChartEventFilter) => { + onChange({ + ...criteria, + filters: criteria.filters.filter((f) => f.id !== filter.id), + }); + }; + + const updateFilterValue = ( + value: Array, + filter: IChartEventFilter, + ) => { + onChange({ + ...criteria, + filters: criteria.filters.map((f) => + f.id === filter.id ? { ...f, value } : f, + ), + }); + }; + + const updateFilterOperator = ( + operator: IChartEventFilter['operator'], + filter: IChartEventFilter, + ) => { + onChange({ + ...criteria, + filters: criteria.filters.map((f) => + f.id === filter.id ? { ...f, operator, value: f.value.slice(0, 1) } : f, + ), + }); + }; + + return ( +
+
+
+ + + onChange({ ...criteria, name: values[0] || '' }) + } + placeholder="Select event..." + className="w-full" + /> +
+ +
+ + {/* Frequency */} +
+ +
+ + onChange({ + ...criteria, + frequency: { ...criteria.frequency!, operator }, + }) + } + items={[ + { value: 'gte', label: 'At least' }, + { value: 'eq', label: 'Exactly' }, + { value: 'lte', label: 'At most' }, + ]} + label="Operator" + > + + + + onChange({ + ...criteria, + frequency: { + ...criteria.frequency!, + value: parseInt(e.target.value) || 1, + }, + }) + } + className="w-20 rounded border px-2 py-1 text-sm" + /> + + times + +
+
+ + {/* Timeframe */} +
+ +
+ { + if (type === 'relative') { + onChange({ + ...criteria, + timeframe: { type: 'relative', value: '30d' }, + }); + } else { + onChange({ + ...criteria, + timeframe: { + type: 'absolute', + value: new Date().toISOString().split('T')[0], + }, + }); + } + }} + items={[ + { value: 'relative', label: 'Last' }, + { value: 'absolute', label: 'Since' }, + ]} + label="Type" + > + + + {criteria.timeframe.type === 'relative' ? ( + + onChange({ + ...criteria, + timeframe: { type: 'relative', value }, + }) + } + items={[ + { value: '7d', label: '7 days' }, + { value: '30d', label: '30 days' }, + { value: '90d', label: '90 days' }, + { value: '1y', label: '1 year' }, + ]} + label="Period" + > + + + ) : ( + + onChange({ + ...criteria, + timeframe: { type: 'absolute', value: e.target.value }, + }) + } + className="rounded border px-2 py-1 text-sm" + /> + )} +
+
+ + {/* Filters */} + {criteria.filters.length > 0 && ( +
+ +
+ {criteria.filters.map((filter) => ( + + ))} +
+
+ )} + + +
+ ); +} + +interface PropertyBasedBuilderProps { + definition: PropertyBasedCohortDefinition; + onChange: (definition: PropertyBasedCohortDefinition) => void; +} + +function PropertyBasedBuilder({ + definition, + onChange, +}: PropertyBasedBuilderProps) { + const addPropertyFilter = () => { + onChange({ + ...definition, + criteria: { + ...definition.criteria, + properties: [ + ...definition.criteria.properties, + { + id: Math.random().toString(36).substring(7), + name: '', + operator: 'is', + value: [], + }, + ], + }, + }); + }; + + const removePropertyFilter = (filter: IChartEventFilter) => { + onChange({ + ...definition, + criteria: { + ...definition.criteria, + properties: definition.criteria.properties.filter( + (f) => f.id !== filter.id, + ), + }, + }); + }; + + const updatePropertyFilterValue = ( + value: Array, + filter: IChartEventFilter, + ) => { + onChange({ + ...definition, + criteria: { + ...definition.criteria, + properties: definition.criteria.properties.map((f) => + f.id === filter.id ? { ...f, value } : f, + ), + }, + }); + }; + + const updatePropertyFilterOperator = ( + operator: IChartEventFilter['operator'], + filter: IChartEventFilter, + ) => { + onChange({ + ...definition, + criteria: { + ...definition.criteria, + properties: definition.criteria.properties.map((f) => + f.id === filter.id + ? { ...f, operator, value: f.value.slice(0, 1) } + : f, + ), + }, + }); + }; + + const updateOperator = (operator: 'or' | 'and') => { + onChange({ + ...definition, + criteria: { + ...definition.criteria, + operator, + }, + }); + }; + + return ( +
+
+ Match + + + +
+ + {definition.criteria.properties.length > 0 && ( +
+ {definition.criteria.properties.map((filter) => ( + + ))} +
+ )} + + +
+ ); +} diff --git a/apps/start/src/components/forms/textarea-with-label.tsx b/apps/start/src/components/forms/textarea-with-label.tsx new file mode 100644 index 000000000..666e41062 --- /dev/null +++ b/apps/start/src/components/forms/textarea-with-label.tsx @@ -0,0 +1,24 @@ +import { Label } from '@/components/ui/label'; +import { Textarea } from '@/components/ui/textarea'; +import { forwardRef } from 'react'; + +interface TextareaWithLabelProps + extends React.TextareaHTMLAttributes { + label: string; +} + +export const TextareaWithLabel = forwardRef< + HTMLTextAreaElement, + TextareaWithLabelProps +>(({ label, id, ...props }, ref) => { + const inputId = id || label.toLowerCase().replace(/\s+/g, '-'); + + return ( +
+ +