Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions apps/web/src/app/admin/api/backfills/email-domain/route.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ describe('emailDomainBackfillCandidates', () => {
expect(rows.map(r => r.id)).not.toContain(user.id);
});

it('excludes soft-deleted users so the GDPR email_domain=null invariant is preserved', async () => {
it('does not select newly soft-deleted users because their tombstone domain is stored', async () => {
const user = await insertTestUser({ email_domain: 'example.com' });

await softDeleteUser(user.id);
const softDeleted = await db
.select()
.from(kilocode_users)
.where(eq(kilocode_users.id, user.id));
expect(softDeleted[0].email_domain).toBeNull();
expect(softDeleted[0].email_domain).toBe('deleted.invalid');
expect(softDeleted[0].blocked_reason).toMatch(/^soft-deleted at /);

const rows = await db
Expand All @@ -52,6 +52,23 @@ describe('emailDomainBackfillCandidates', () => {
expect(rows.map(r => r.id)).not.toContain(user.id);
});

it('includes legacy soft-deleted users missing a tombstone domain', async () => {
const userId = 'legacy-deleted-user';
const user = await insertTestUser({
id: userId,
google_user_email: `deleted+${userId}@deleted.invalid`,
email_domain: null,
blocked_reason: 'soft-deleted at 2026-01-15T12:00:00.000Z',
});

const rows = await db
.select({ id: kilocode_users.id })
.from(kilocode_users)
.where(emailDomainBackfillCandidates);

expect(rows.map(r => r.id)).toContain(user.id);
});

it('still includes users blocked for other reasons', async () => {
const user = await insertTestUser({
email_domain: null,
Expand Down
14 changes: 2 additions & 12 deletions apps/web/src/app/admin/api/backfills/email-domain/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,10 @@ import { NextResponse } from 'next/server';
import { getUserFromAuth } from '@/lib/user/server';
import { db } from '@/lib/drizzle';
import { kilocode_users } from '@kilocode/db';
import { and, isNull, count, not, or, sql, like } from 'drizzle-orm';
import { isNull, count, sql } from 'drizzle-orm';
import { extractEmailDomain } from '@/lib/email-domain';

// Exclude soft-deleted users: softDeleteUser anonymizes them to
// `deleted+<id>@deleted.invalid` and sets `blocked_reason` to a string starting
// with `soft-deleted at`. Filling email_domain for those rows would undo the
// GDPR nulling invariant.
export const emailDomainBackfillCandidates = and(
isNull(kilocode_users.email_domain),
or(
isNull(kilocode_users.blocked_reason),
not(like(kilocode_users.blocked_reason, 'soft-deleted at %'))
)
);
export const emailDomainBackfillCandidates = isNull(kilocode_users.email_domain);

export type EmailDomainCountsResponse = {
missing: number;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/* eslint-disable drizzle/enforce-delete-with-where */
import { db } from '@/lib/drizzle';
import { kilocode_users } from '@kilocode/db';
import { eq } from 'drizzle-orm';
import { insertTestUser } from '@/tests/helpers/user.helper';
import { softDeleteUser } from '@/lib/user';
import { canonicalizeDeletedUserEmail } from '@/lib/user/deleted-email';
import { normalizedEmailBackfillCandidates } from './route';

describe('normalizedEmailBackfillCandidates', () => {
afterEach(async () => {
await db.delete(kilocode_users);
});

it('includes users that are missing normalized_email', async () => {
const user = await insertTestUser({ normalized_email: null });

const rows = await db
.select({ id: kilocode_users.id })
.from(kilocode_users)
.where(normalizedEmailBackfillCandidates);

expect(rows.map(r => r.id)).toContain(user.id);
});

it('excludes users that already have normalized_email set', async () => {
const user = await insertTestUser({ normalized_email: 'user@example.com' });

const rows = await db
.select({ id: kilocode_users.id })
.from(kilocode_users)
.where(normalizedEmailBackfillCandidates);

expect(rows.map(r => r.id)).not.toContain(user.id);
});

it('does not select newly soft-deleted users because their tombstone email is stored', async () => {
const user = await insertTestUser({ normalized_email: 'user@example.com' });

await softDeleteUser(user.id);
const softDeleted = await db
.select()
.from(kilocode_users)
.where(eq(kilocode_users.id, user.id));
expect(softDeleted[0].google_user_email).toBe(`deleted-${user.id}@deleted.invalid`);
expect(softDeleted[0].normalized_email).toBe(`deleted-${user.id}@deleted.invalid`);
expect(softDeleted[0].blocked_reason).toMatch(/^soft-deleted at /);

const rows = await db
.select({ id: kilocode_users.id })
.from(kilocode_users)
.where(normalizedEmailBackfillCandidates);

expect(rows.map(r => r.id)).not.toContain(user.id);
});

it('selects and canonicalizes legacy plus-addressed deletion tombstones', async () => {
const userId = 'legacy-deleted-user';
const legacyEmail = `deleted+${userId}@deleted.invalid`;
const user = await insertTestUser({
id: userId,
google_user_email: legacyEmail,
normalized_email: 'deleted@deleted.invalid',
blocked_reason: 'soft-deleted at 2026-01-15T12:00:00.000Z',
});

const rows = await db
.select({ id: kilocode_users.id })
.from(kilocode_users)
.where(normalizedEmailBackfillCandidates);

expect(rows.map(r => r.id)).toContain(user.id);
expect(canonicalizeDeletedUserEmail(user.id, legacyEmail)).toBe(
`deleted-${user.id}@deleted.invalid`
);
});

it('still includes users blocked for other reasons', async () => {
const user = await insertTestUser({
normalized_email: null,
blocked_reason: 'domainblocked',
});

const rows = await db
.select({ id: kilocode_users.id })
.from(kilocode_users)
.where(normalizedEmailBackfillCandidates);

expect(rows.map(r => r.id)).toContain(user.id);
});
});
31 changes: 21 additions & 10 deletions apps/web/src/app/admin/api/backfills/normalized-email/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ import { NextResponse } from 'next/server';
import { getUserFromAuth } from '@/lib/user/server';
import { db } from '@/lib/drizzle';
import { kilocode_users } from '@kilocode/db';
import { isNull, count, sql } from 'drizzle-orm';
import { isNull, count, or, sql } from 'drizzle-orm';
import { normalizeEmail } from '@/lib/utils';
import { canonicalizeDeletedUserEmail } from '@/lib/user/deleted-email';

export type NormalizedEmailCountsResponse = {
missing: number;
Expand All @@ -14,6 +15,11 @@ export type NormalizedEmailBackfillResponse = {
remaining: boolean;
};

export const normalizedEmailBackfillCandidates = or(
isNull(kilocode_users.normalized_email),
sql`${kilocode_users.google_user_email} = 'deleted+' || ${kilocode_users.id} || '@deleted.invalid'`
);

export async function GET(): Promise<
NextResponse<NormalizedEmailCountsResponse | { error: string }>
> {
Expand All @@ -23,7 +29,7 @@ export async function GET(): Promise<
const [result] = await db
.select({ count: count() })
.from(kilocode_users)
.where(isNull(kilocode_users.normalized_email));
.where(normalizedEmailBackfillCandidates);

return NextResponse.json({ missing: result?.count ?? 0 });
}
Expand All @@ -43,23 +49,28 @@ export async function POST(): Promise<
const rows = await db
.select({ id: kilocode_users.id, google_user_email: kilocode_users.google_user_email })
.from(kilocode_users)
.where(isNull(kilocode_users.normalized_email))
.where(normalizedEmailBackfillCandidates)
.limit(BATCH_SIZE);

if (rows.length === 0) break;

const updates = rows.map(row => ({
id: row.id,
normalized_email: normalizeEmail(row.google_user_email),
}));
const updates = rows.map(row => {
const email = canonicalizeDeletedUserEmail(row.id, row.google_user_email);
return {
id: row.id,
google_user_email: email,
normalized_email: normalizeEmail(email),
};
});

await db.execute(sql`
UPDATE ${kilocode_users}
SET normalized_email = email_updates.normalized_email
SET google_user_email = email_updates.google_user_email,
normalized_email = email_updates.normalized_email
FROM (VALUES ${sql.join(
updates.map(u => sql`(${u.id}, ${u.normalized_email})`),
updates.map(u => sql`(${u.id}, ${u.google_user_email}, ${u.normalized_email})`),
sql`, `
)}) AS email_updates(id, normalized_email)
)}) AS email_updates(id, google_user_email, normalized_email)
WHERE ${kilocode_users.id} = email_updates.id
`);

Expand Down
4 changes: 4 additions & 0 deletions apps/web/src/lib/bot-users/bot-user-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import { kilocode_users, organization_memberships, type User } from '@kilocode/d
import { eq, and } from 'drizzle-orm';
import { captureException } from '@sentry/nextjs';
import { logExceptInTest, errorExceptInTest } from '@/lib/utils.server';
import { normalizeEmail } from '@/lib/utils';
import { extractEmailDomain } from '@/lib/email-domain';
import crypto from 'crypto';
import type { BotType } from './types';
import { generateBotUserId, generateBotUserEmail, getBotDisplayName } from './types';
Expand Down Expand Up @@ -53,6 +55,8 @@ async function createBotUser(organizationId: string, botType: BotType): Promise<
.values({
id: botId,
google_user_email: botEmail,
normalized_email: normalizeEmail(botEmail),
email_domain: extractEmailDomain(botEmail),
google_user_name: botName,
google_user_image_url:
'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iNDgiIGhlaWdodD0iNDgiIHZpZXdCb3g9IjAgMCA0OCA0OCIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48Y2lyY2xlIGN4PSIyNCIgY3k9IjI0IiByPSIyNCIgZmlsbD0iIzY2NjY2NiIvPjwvc3ZnPg==', // Gray circle placeholder
Expand Down
1 change: 1 addition & 0 deletions apps/web/src/lib/email-domain.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ describe('extractEmailDomain', () => {
// tldts treats the final label as a public suffix when unknown.
expect(extractEmailDomain('alice@host.madeuptld')).toBe('host.madeuptld');
expect(extractEmailDomain('alice@sub.host.madeuptld')).toBe('host.madeuptld');
expect(extractEmailDomain('bot@kilocode.internal')).toBe('kilocode.internal');
});

it('falls back to `<host>.invalid` when tldts cannot resolve a registrable domain (e.g. IP)', () => {
Expand Down
13 changes: 13 additions & 0 deletions apps/web/src/lib/user/deleted-email.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
const DELETED_USER_EMAIL_DOMAIN = 'deleted.invalid';

export function getDeletedUserEmail(userId: string): string {
return `deleted-${userId}@${DELETED_USER_EMAIL_DOMAIN}`;
}

export function getLegacyDeletedUserEmail(userId: string): string {
return `deleted+${userId}@${DELETED_USER_EMAIL_DOMAIN}`;
}

export function canonicalizeDeletedUserEmail(userId: string, email: string): string {
return email === getLegacyDeletedUserEmail(userId) ? getDeletedUserEmail(userId) : email;
}
8 changes: 4 additions & 4 deletions apps/web/src/lib/user/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,9 @@ describe('User', () => {

const softDeleted = await findUserById(user.id);
expect(softDeleted).toBeDefined();
expect(softDeleted!.google_user_email).toBe(`deleted+${user.id}@deleted.invalid`);
expect(softDeleted!.normalized_email).toBeNull();
expect(softDeleted!.email_domain).toBeNull();
expect(softDeleted!.google_user_email).toBe(`deleted-${user.id}@deleted.invalid`);
expect(softDeleted!.normalized_email).toBe(`deleted-${user.id}@deleted.invalid`);
expect(softDeleted!.email_domain).toBe('deleted.invalid');
expect(softDeleted!.google_user_name).toBe('Deleted User');
expect(softDeleted!.google_user_image_url).toBe('');
expect(softDeleted!.hosted_domain).toBeNull();
Expand Down Expand Up @@ -1235,7 +1235,7 @@ describe('User', () => {
const anonymized = rows.find(row => row.benchEvalName === 'soft-delete-promoter-eval');
const retained = rows.find(row => row.benchEvalName === 'retained-promoter-eval');

expect(anonymized?.promoterEmail).toBe(`deleted+${promoter.id}@deleted.invalid`);
expect(anonymized?.promoterEmail).toBe(`deleted-${promoter.id}@deleted.invalid`);
expect(retained?.promoterEmail).toBe(otherPromoter.google_user_email);
});

Expand Down
14 changes: 8 additions & 6 deletions apps/web/src/lib/user/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ import {
} from '@/lib/ai-gateway/providerHash';
import { normalizeEmail } from '@/lib/utils';
import { extractEmailDomain } from '@/lib/email-domain';
import { getDeletedUserEmail } from './deleted-email';
import { recordAffiliateAttributionAndQueueParentEvent } from '@/lib/impact/affiliate-events';
import { logImpactReferralDebug } from '@/lib/impact/debug';
import {
Expand Down Expand Up @@ -790,7 +791,7 @@ export class SoftDeletePreconditionError extends Error {
* organization_id references the organization — no direct PII)
*
* What is scrubbed/deleted:
* - PII on the user row (email, name, avatar, urls)
* - PII on the user row (email replaced with a synthetic tombstone; name, avatar, urls cleared)
* - user_auth_provider (auth links with email/avatar)
* - enrichment_data (GitHub/LinkedIn/Clay PII)
* - user_admin_notes
Expand Down Expand Up @@ -891,12 +892,13 @@ export async function softDeleteUser(userId: string) {
});

// ── 1. Anonymize the user row ────────────────────────────────────────
const deletedEmail = getDeletedUserEmail(userId);
await tx
.update(kilocode_users)
.set({
google_user_email: `deleted+${userId}@deleted.invalid`,
normalized_email: null,
email_domain: null,
google_user_email: deletedEmail,
normalized_email: normalizeEmail(deletedEmail),
email_domain: extractEmailDomain(deletedEmail),
google_user_name: 'Deleted User',
google_user_image_url: '',
hosted_domain: null,
Expand Down Expand Up @@ -1165,7 +1167,7 @@ export async function softDeleteUser(userId: string) {

await tx
.update(model_eval_ingestions)
.set({ promoted_by_email: `deleted+${userId}@deleted.invalid` })
.set({ promoted_by_email: deletedEmail })
.where(sql`lower(${model_eval_ingestions.promoted_by_email}) = lower(${originalEmail})`);

// Credit campaigns: strip the creator-admin reference. The campaigns
Expand Down Expand Up @@ -1211,7 +1213,7 @@ export async function softDeleteUser(userId: string) {
);
// Also clear events matched by email directly (covers un-enrolled contributors).
// Use originalEmail captured before the user row was anonymized — the subquery
// would resolve to the already-overwritten deleted+<id>@deleted.invalid address.
// would resolve to the already-overwritten synthetic deletion address.
await tx
.update(contributor_champion_events)
.set({ github_author_email: null })
Expand Down
5 changes: 4 additions & 1 deletion services/webhook-agent-ingest/src/db/queries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export type BotUserForToken = {
const WEBHOOK_BOT_ID_PREFIX = 'bot-webhook';
const WEBHOOK_BOT_EMAIL_SUFFIX = 'webhook-bot';
const WEBHOOK_BOT_DISPLAY_NAME = 'Webhook Bot';
const BOT_EMAIL_DOMAIN = 'kilocode.internal';
const BOT_AVATAR_PLACEHOLDER =
'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iNDgiIGhlaWdodD0iNDgiIHZpZXdCb3g9IjAgMCA0OCA0OCIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48Y2lyY2xlIGN4PSIyNCIgY3k9IjI0IiByPSIyNCIgZmlsbD0iIzY2NjY2NiIvPjwvc3ZnPg==';

Expand All @@ -31,7 +32,7 @@ export function generateBotUserId(organizationId: string): string {
}

export function generateBotUserEmail(organizationId: string): string {
return `${WEBHOOK_BOT_EMAIL_SUFFIX}-${organizationId}@kilocode.internal`;
return `${WEBHOOK_BOT_EMAIL_SUFFIX}-${organizationId}@${BOT_EMAIL_DOMAIN}`;
}

function generateApiTokenPepper(): string {
Expand Down Expand Up @@ -145,6 +146,8 @@ export async function ensureBotUserForOrg(db: WorkerDb, orgId: string): Promise<
await db.insert(kilocode_users).values({
id: botId,
google_user_email: botEmail,
normalized_email: botEmail,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: normalized_email is set to the raw botEmail without calling normalizeEmail(), while the web-app bot creation path (bot-user-service.ts:58) consistently calls normalizeEmail(botEmail). Bot emails are already lowercase with no + aliases, so in practice the result is identical today — but the inconsistency could silently diverge if generateBotUserEmail ever produces mixed-case output. Consider passing through normalizeEmail(botEmail) here for parity.

email_domain: BOT_EMAIL_DOMAIN,
google_user_name: WEBHOOK_BOT_DISPLAY_NAME,
google_user_image_url: BOT_AVATAR_PLACEHOLDER,
stripe_customer_id: stripeCustomerId,
Expand Down