From 782e448c6c44e80da944a15182e23cdc09d34591 Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 19 Apr 2026 10:01:44 +0100 Subject: [PATCH 1/8] docs: PR5 GDPR author erasure design spec --- ...26-04-19-gdpr-pr5-author-erasure-design.md | 222 ++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 docs/superpowers/specs/2026-04-19-gdpr-pr5-author-erasure-design.md diff --git a/docs/superpowers/specs/2026-04-19-gdpr-pr5-author-erasure-design.md b/docs/superpowers/specs/2026-04-19-gdpr-pr5-author-erasure-design.md new file mode 100644 index 00000000000..2bfe2c93942 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-gdpr-pr5-author-erasure-design.md @@ -0,0 +1,222 @@ +# PR5 — GDPR Author Erasure (Right to be Forgotten) + +Last of five GDPR PRs (ether/etherpad#6701). Implements anonymisation +of an author's identity — display name, colour, and the token/mapper +bindings that link a real-world session to an `authorID` — while +leaving pad content intact. This is the GDPR-preferred shape for Art. +17 (erasure) because deleting the author's edits would corrupt every +pad they touched. + +## Audit summary + +What links an authorID back to a real person today: + +| DB key | Content | Personal? | +| --- | --- | --- | +| `globalAuthor:` | `{name, colorId, timestamp}` plus whatever plugins stamp | **yes** (display name) | +| `token2author:` | `authorID` | **yes** (token is the browser-side secret) | +| `mapper2author:` | `authorID` | **yes** (mapper is SSO / API caller identity) | +| `pad::chat:` → `ChatMessage` | stored with `authorId` | authorID ref only, no name | +| `pad::revs:` / changesets + attrib pool | embedded `author` attrib keyed by `authorID` | authorID ref only, no name | + +Anonymising the three author-keyed records severs the link between the +authorID and the person. The changeset/chat references that remain are +opaque and unlinkable without the first three. + +## Goals + +- Server-side `anonymizeAuthor(authorID)` that: + - zeroes `name`, `colorId` in `globalAuthor:` (keeps the + key so changeset references still resolve to "an author" with no + details) + - deletes every `token2author:` entry pointing at the author + - deletes every `mapper2author:` entry pointing at the author + - iterates the author's pads and rewrites each pad's in-memory chat + messages so `authorId` becomes `null`, then persists + - leaves pad content, revision history, and attribute pools alone +- Admin REST endpoint `POST /api//anonymizeAuthor` that wraps the + call; auth uses the existing apikey / JWT admin path. +- Idempotent: calling twice on the same authorID is a no-op. + +## Non-goals + +- Deleting the author's pads. Erasing is shaped as anonymisation, not + deletion — operators who want a pad gone can use PR1 (#7546). +- Rewriting the attribute pool in every pad to drop the author entirely. + Grep of `src/node/utils/padDiff` confirms existing consumers (line + colours, authorship-history sidebar) already handle missing + `globalAuthor:.name` by displaying a blank author — the UI + degrades to "an anonymous author" without further changes. +- Rolling up historical chat into one big aggregate. We touch each + message individually, keeping its timestamp and text intact. +- Adding a "undo erasure" path. GDPR erasure is one-way. + +## Design + +### AuthorManager surface + +```typescript +// src/node/db/AuthorManager.ts additions +exports.anonymizeAuthor = async (authorID: string): Promise<{ + affectedPads: number, + removedTokenMappings: number, + removedExternalMappings: number, + clearedChatMessages: number, +}> => { /* ... */ }; +``` + +Pseudocode: + +```typescript +const existing = await db.get(`globalAuthor:${authorID}`); +if (existing == null) return {affectedPads: 0, removedTokenMappings: 0, /* ... */}; + +// 1. Redact identity on the globalAuthor record but keep the record +// itself so the authorID is still a valid key for historical data. +await db.set(`globalAuthor:${authorID}`, { + colorId: 0, + name: null, + timestamp: Date.now(), + padIDs: existing.padIDs, // retain pad membership — it is not PII on its own + erased: true, + erasedAt: new Date().toISOString(), +}); + +// 2. Drop token/mapper bindings that point at this author. +let removedTokenMappings = 0; +let removedExternalMappings = 0; +for (const [key, value] of await db.findKeys('token2author:*', null) + .then((keys) => Promise.all(keys.map(async (k) => [k, await db.get(k)] as const)))) { + if (value === authorID) { await db.remove(key); removedTokenMappings++; } +} +for (const [key, value] of await db.findKeys('mapper2author:*', null) + .then((keys) => Promise.all(keys.map(async (k) => [k, await db.get(k)] as const)))) { + if (value === authorID) { await db.remove(key); removedExternalMappings++; } +} + +// 3. Walk the author's pads and null-out chat messages they authored. +const padIDs = existing.padIDs || {}; +let clearedChatMessages = 0; +for (const padID of Object.keys(padIDs)) { + if (!await padManager.doesPadExist(padID)) continue; + const pad = await padManager.getPad(padID); + for (let i = 0; i < pad.chatHead + 1; i++) { + const key = `pad:${padID}:chat:${i}`; + const chat = await db.get(key); + if (chat && chat.authorId === authorID) { + chat.authorId = null; + await db.set(key, chat); + clearedChatMessages++; + } + } +} + +return { + affectedPads: Object.keys(padIDs).length, + removedTokenMappings, + removedExternalMappings, + clearedChatMessages, +}; +``` + +Notes: +- `db.findKeys` exists in etherpad's DB abstraction (used by + `Pad.listAuthors` etc.). If unavailable for a given ueberdb driver, + fall back to scanning via the pad lists we already have — the + common databases (`dirty`, `sqlite`, `postgres`, `redis`) all + support it. +- We never edit revision changesets or the attribute pool. A previously + anonymised author remains present in the pool under their opaque + `authorID`; without the `globalAuthor.name` the UI shows a blank + author strip, which is the desired degradation. + +### REST API + +Extend the existing API versioning map in +`src/node/handler/APIHandler.ts`: + +```typescript +version['1.3.1'] = { + ...version['1.3.0'], + anonymizeAuthor: ['authorID'], +}; +exports.latestApiVersion = '1.3.1'; +``` + +In `src/node/db/API.ts`: + +```typescript +exports.anonymizeAuthor = async (authorID: string) => { + if (!authorID) throw new CustomError('authorID is required', 'apierror'); + return await authorManager.anonymizeAuthor(authorID); +}; +``` + +Auth: the existing `APIHandler.handle` already enforces apikey or JWT +admin auth before dispatching to `api[functionName]`, so no extra +gating needed. + +### OpenAPI + +`RestAPI.ts` builds the OpenAPI document from `APIHandler.version`. +Because `anonymizeAuthor` is a new entry in the version map, the +generated OpenAPI definition picks it up automatically — no manual +edits required. + +### Docs + +- Add a "Right to erasure" section to `doc/privacy.md` describing: + - what happens to the author record, + - what is kept (pad content, revision history, opaque authorID), + - how operators trigger it (`POST /api/1.3.1/anonymizeAuthor?authorID=...`). +- Add an admin-facing one-liner to `doc/api/http_api.md` referencing + the new endpoint if the file exists. + +## Testing + +### Unit + +`src/tests/backend/specs/anonymizeAuthor.ts`: + +1. Seed a fresh author via `authorManager.createAuthor('Alice')`. + Confirm `globalAuthor.name === 'Alice'`, a token mapping exists, + a mapper mapping exists (use `setAuthorName` + `getAuthorId` to + create them). +2. Call `anonymizeAuthor(authorID)`. +3. Assert: + - `globalAuthor:` still exists with `{name: null, colorId: 0, erased: true}`. + - `token2author:` deleted. + - `mapper2author:` deleted. + - Second call is a no-op and returns zero counters. + +### REST integration + +`src/tests/backend/specs/api/anonymizeAuthor.ts`: + +1. `createAuthor` via API, get `authorID`. +2. `POST anonymizeAuthor?authorID=` with JWT admin token → expect + `code: 0, data: {affectedPads, removedTokenMappings, ...}`. +3. `getAuthorName(authorID)` → returns `null`. +4. Call `anonymizeAuthor` with missing `authorID` → returns + `code: 1, message: 'authorID is required'`. + +### Chat regression + +Light touch: create a pad, chat as the author, call anonymizeAuthor, +load `getChatHistory`, confirm the message text is unchanged and +`authorId` is `null`. + +## Risk and migration + +- `padIDs` in the original `globalAuthor` record is kept intact — + needed to find which pads need chat-scrub, and not personally + identifying on its own (pad IDs are user-chosen strings; they can + point to named URLs but that's an operator-level concern). +- Idempotent: erased records carry `erased: true`, so the helper + short-circuits on subsequent calls without re-walking pads. +- If an author was active on thousands of pads, the chat loop can be + slow. Document the worst-case cost; real-world GDPR requests are + single-digit frequency, so a one-time scan is acceptable. +- `ueberdb` `findKeys` has per-driver caveats. The unit test uses the + `dirty` driver which supports the glob. The REST test runs under + the same driver via `common.init()`. From 488fef7e1daf942106322ee564260556c4d7309d Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 19 Apr 2026 10:03:37 +0100 Subject: [PATCH 2/8] docs: PR5 GDPR author erasure implementation plan --- .../2026-04-19-gdpr-pr5-author-erasure.md | 510 ++++++++++++++++++ 1 file changed, 510 insertions(+) create mode 100644 docs/superpowers/plans/2026-04-19-gdpr-pr5-author-erasure.md diff --git a/docs/superpowers/plans/2026-04-19-gdpr-pr5-author-erasure.md b/docs/superpowers/plans/2026-04-19-gdpr-pr5-author-erasure.md new file mode 100644 index 00000000000..d533cea70e8 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-gdpr-pr5-author-erasure.md @@ -0,0 +1,510 @@ +# GDPR PR5 — Author Erasure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Implement GDPR Art. 17 "right to be forgotten" for an anonymous author — zero the display identity on `globalAuthor:`, delete the `token2author:*` and `mapper2author:*` bindings that resolve a real person to the opaque authorID, and null-out chat authorship for messages the author posted. Pad text, revision history, and attribute pools are kept intact. + +**Architecture:** A new `authorManager.anonymizeAuthor(authorID)` that owns the full sweep, a thin `API.ts` wrapper that plugs into the existing REST auth pipeline, a new `anonymizeAuthor` entry in `APIHandler.version['1.3.1']`. Tests: unit for the manager, REST integration with the project's JWT admin-auth pattern, chat-round-trip regression. + +**Tech Stack:** TypeScript, ueberdb (via the existing `DB.db.findKeys` helper), Mocha + supertest for backend tests. + +--- + +## File Structure + +**Modified:** +- `src/node/db/AuthorManager.ts` — add `anonymizeAuthor` +- `src/node/db/API.ts` — expose it on the programmatic API +- `src/node/handler/APIHandler.ts` — register version `1.3.1`, bump `latestApiVersion` +- `doc/privacy.md` — new "Right to erasure" section (file was created by PR4 #7549; we append) + +**Created:** +- `src/tests/backend/specs/anonymizeAuthor.ts` — AuthorManager unit tests +- `src/tests/backend/specs/api/anonymizeAuthor.ts` — REST integration tests + +--- + +## Task 1: `anonymizeAuthor` on AuthorManager + +**Files:** +- Modify: `src/node/db/AuthorManager.ts` — append the exported function + +- [ ] **Step 1: Read `AuthorManager.ts` to confirm existing exports** + +Run: `grep -n "exports\." src/node/db/AuthorManager.ts` + +Look for `exports.listPadsOfAuthor`, `exports.addPad`, `exports.removePad`. They're the closest neighbours and share the `padIDs` traversal idea. + +- [ ] **Step 2: Import `db` and `padManager` already in file — just append the function** + +At the bottom of `src/node/db/AuthorManager.ts`: + +```typescript +/** + * GDPR Art. 17: anonymise an author. Zeroes the display identity on + * globalAuthor:, deletes the token/mapper bindings that link a + * person to this authorID, and nulls authorship on chat messages they + * posted. Leaves pad content and revision history intact — the changeset + * references are opaque without the identity record, so the link to the + * real person is severed even though the bytes survive. + * + * Idempotent: once `erased: true` is set on the author record, subsequent + * calls short-circuit and return zero counters. + */ +exports.anonymizeAuthor = async (authorID: string): Promise<{ + affectedPads: number, + removedTokenMappings: number, + removedExternalMappings: number, + clearedChatMessages: number, +}> => { + const existing = await db.get(`globalAuthor:${authorID}`); + if (existing == null || existing.erased) { + return { + affectedPads: 0, + removedTokenMappings: 0, + removedExternalMappings: 0, + clearedChatMessages: 0, + }; + } + + // Drop the token/mapper mappings first, before zeroing the display + // record, so a concurrent getAuthorId() can no longer resolve this + // author through its old bindings mid-erasure. + let removedTokenMappings = 0; + const tokenKeys = await db.findKeys('token2author:*', null); + for (const key of tokenKeys) { + if (await db.get(key) === authorID) { + await db.remove(key); + removedTokenMappings++; + } + } + let removedExternalMappings = 0; + const mapperKeys = await db.findKeys('mapper2author:*', null); + for (const key of mapperKeys) { + if (await db.get(key) === authorID) { + await db.remove(key); + removedExternalMappings++; + } + } + + // Zero the display identity but keep padIDs so future maintenance (or a + // pad-delete batch) can still find the set of pads this authorID touched. + await db.set(`globalAuthor:${authorID}`, { + colorId: 0, + name: null, + timestamp: Date.now(), + padIDs: existing.padIDs || {}, + erased: true, + erasedAt: new Date().toISOString(), + }); + + // Null authorship on chat messages the author posted. + const padIDs = Object.keys(existing.padIDs || {}); + let clearedChatMessages = 0; + for (const padID of padIDs) { + if (!await padManager.doesPadExist(padID)) continue; + const pad = await padManager.getPad(padID); + const chatHead = pad.chatHead; + if (typeof chatHead !== 'number' || chatHead < 0) continue; + for (let i = 0; i <= chatHead; i++) { + const chatKey = `pad:${padID}:chat:${i}`; + const msg = await db.get(chatKey); + if (msg != null && msg.authorId === authorID) { + msg.authorId = null; + await db.set(chatKey, msg); + clearedChatMessages++; + } + } + } + + return { + affectedPads: padIDs.length, + removedTokenMappings, + removedExternalMappings, + clearedChatMessages, + }; +}; +``` + +- [ ] **Step 3: Type check** + +Run: `pnpm --filter ep_etherpad-lite run ts-check` +Expected: exit 0. + +- [ ] **Step 4: Commit** + +```bash +git add src/node/db/AuthorManager.ts +git commit -m "feat(gdpr): AuthorManager.anonymizeAuthor — Art. 17 erasure" +``` + +--- + +## Task 2: Unit tests for `anonymizeAuthor` + +**Files:** +- Create: `src/tests/backend/specs/anonymizeAuthor.ts` + +- [ ] **Step 1: Write the test** + +```typescript +'use strict'; + +import {strict as assert} from 'assert'; + +const common = require('../common'); +const authorManager = require('../../../node/db/AuthorManager'); +const db = require('../../../node/db/DB'); + +describe(__filename, function () { + before(async function () { + this.timeout(60000); + await common.init(); + }); + + it('zeroes the display identity on globalAuthor:', async function () { + const {authorID} = await authorManager.createAuthorIfNotExistsFor( + `mapper-${Date.now()}-${Math.random()}`, 'Alice'); + assert.equal(await authorManager.getAuthorName(authorID), 'Alice'); + + const res = await authorManager.anonymizeAuthor(authorID); + assert.ok(res.removedExternalMappings >= 1); + + const record = await db.db.get(`globalAuthor:${authorID}`); + assert.equal(record.name, null); + assert.equal(record.colorId, 0); + assert.equal(record.erased, true); + assert.ok(typeof record.erasedAt === 'string'); + }); + + it('drops token2author and mapper2author mappings pointing at the author', + async function () { + const mapper = `mapper-${Date.now()}-${Math.random()}`; + const {authorID} = await authorManager.createAuthorIfNotExistsFor( + mapper, 'Bob'); + // Create a token mapping by calling getAuthorId with a new token. + const token = `t.${Date.now().toString(36)}${Math.random().toString(36).slice(2)}`; + // getAuthorId takes (token, user); first call seeds token2author:. + await authorManager.getAuthorId(token, {}); + // We need a token that resolves to *this* authorID. Do it by making + // the token's first use deterministic: set token2author: ourselves. + await db.db.set(`token2author:${token}`, authorID); + + assert.equal(await db.db.get(`token2author:${token}`), authorID); + assert.equal(await db.db.get(`mapper2author:${mapper}`), authorID); + + const res = await authorManager.anonymizeAuthor(authorID); + assert.ok(res.removedTokenMappings >= 1); + assert.ok(res.removedExternalMappings >= 1); + assert.equal(await db.db.get(`token2author:${token}`), null); + assert.equal(await db.db.get(`mapper2author:${mapper}`), null); + }); + + it('is idempotent — second call returns zero counters', async function () { + const {authorID} = await authorManager.createAuthorIfNotExistsFor( + `mapper-${Date.now()}-${Math.random()}`, 'Carol'); + await authorManager.anonymizeAuthor(authorID); + const second = await authorManager.anonymizeAuthor(authorID); + assert.deepEqual(second, { + affectedPads: 0, + removedTokenMappings: 0, + removedExternalMappings: 0, + clearedChatMessages: 0, + }); + }); + + it('returns zero counters for an unknown authorID', async function () { + const res = await authorManager.anonymizeAuthor('a.does-not-exist'); + assert.deepEqual(res, { + affectedPads: 0, + removedTokenMappings: 0, + removedExternalMappings: 0, + clearedChatMessages: 0, + }); + }); +}); +``` + +- [ ] **Step 2: Run** + +Run: `pnpm --filter ep_etherpad-lite exec mocha --require tsx/cjs tests/backend/specs/anonymizeAuthor.ts --timeout 60000` +Expected: 4 tests pass. + +- [ ] **Step 3: Commit** + +```bash +git add src/tests/backend/specs/anonymizeAuthor.ts +git commit -m "test(gdpr): AuthorManager.anonymizeAuthor — identity + mappings + idempotence" +``` + +--- + +## Task 3: Expose on REST API + +**Files:** +- Modify: `src/node/db/API.ts` — add the programmatic `exports.anonymizeAuthor` +- Modify: `src/node/handler/APIHandler.ts` — register version 1.3.1 + +- [ ] **Step 1: Add the API.ts entry** + +Open `src/node/db/API.ts`. Near the other author-surface exports +(`exports.createAuthor`, `exports.getAuthorName`) append: + +```typescript +/** + * anonymizeAuthor(authorID) — GDPR Art. 17 erasure. See doc/privacy.md. + * + * @param {String} authorID + * @returns {Promise<{affectedPads:number, removedTokenMappings:number, + * removedExternalMappings:number, clearedChatMessages:number}>} + */ +exports.anonymizeAuthor = async (authorID: string) => { + if (!authorID || typeof authorID !== 'string') { + throw new CustomError('authorID is required', 'apierror'); + } + return await authorManager.anonymizeAuthor(authorID); +}; +``` + +(`CustomError` and `authorManager` are already imported at the top of +`API.ts`.) + +- [ ] **Step 2: Register a new API version** + +In `src/node/handler/APIHandler.ts`, append a new version entry below +`version['1.3.0']`: + +```typescript +version['1.3.1'] = { + ...version['1.3.0'], + anonymizeAuthor: ['authorID'], +}; + +// set the latest available API version here +exports.latestApiVersion = '1.3.1'; +``` + +Replace the existing `exports.latestApiVersion = '1.3.0';` line with +the `1.3.1` string so the REST `/api/` endpoint advertises it. + +- [ ] **Step 3: Type check + commit** + +Run: `pnpm --filter ep_etherpad-lite run ts-check` + +```bash +git add src/node/db/API.ts src/node/handler/APIHandler.ts +git commit -m "feat(gdpr): REST anonymizeAuthor on API version 1.3.1" +``` + +--- + +## Task 4: REST integration test + +**Files:** +- Create: `src/tests/backend/specs/api/anonymizeAuthor.ts` + +- [ ] **Step 1: Write the spec** + +```typescript +'use strict'; + +import {strict as assert} from 'assert'; + +const common = require('../../common'); + +let agent: any; +let apiVersion = '1.3.1'; +const endPoint = (point: string) => `/api/${apiVersion}/${point}`; + +const callApi = async (point: string, query: Record = {}) => { + const qs = new URLSearchParams(query).toString(); + const path = qs ? `${endPoint(point)}?${qs}` : endPoint(point); + return await agent.get(path) + .set('authorization', await common.generateJWTToken()) + .expect(200) + .expect('Content-Type', /json/); +}; + +describe(__filename, function () { + before(async function () { + this.timeout(60000); + agent = await common.init(); + const res = await agent.get('/api/').expect(200); + apiVersion = res.body.currentVersion; + }); + + it('anonymizeAuthor zeroes the author and returns counters', async function () { + const create = await callApi('createAuthor', {name: 'Alice'}); + assert.equal(create.body.code, 0); + const authorID = create.body.data.authorID; + + const res = await callApi('anonymizeAuthor', {authorID}); + assert.equal(res.body.code, 0, JSON.stringify(res.body)); + assert.ok(res.body.data.affectedPads >= 0); + + const name = await callApi('getAuthorName', {authorID}); + assert.equal(name.body.data.authorName, null); + }); + + it('anonymizeAuthor with missing authorID returns an error', async function () { + const res = await agent.get(`${endPoint('anonymizeAuthor')}?authorID=`) + .set('authorization', await common.generateJWTToken()) + .expect(200) + .expect('Content-Type', /json/); + assert.equal(res.body.code, 1); + assert.match(res.body.message, /authorID is required/); + }); +}); +``` + +- [ ] **Step 2: Run** + +Run: `cd src && NODE_ENV=production pnpm exec mocha --require tsx/cjs tests/backend/specs/api/anonymizeAuthor.ts --timeout 60000` +Expected: 2 tests pass. + +- [ ] **Step 3: Commit** + +```bash +git add src/tests/backend/specs/api/anonymizeAuthor.ts +git commit -m "test(gdpr): REST anonymizeAuthor end-to-end" +``` + +--- + +## Task 5: Docs + +**Files:** +- Modify: `doc/privacy.md` — add a "Right to erasure" section + +- [ ] **Step 1: Check whether the file exists on this branch** + +`doc/privacy.md` is created in PR2 (#7547) and PR4 (#7549). If the +branch doesn't have it yet, create a minimal stub first: + +```bash +ls doc/privacy.md || cat > doc/privacy.md <<'EOF' +# Privacy + +See [cookies.md](cookies.md) for the cookie list and the GDPR work +tracked in [ether/etherpad#6701](https://github.com/ether/etherpad/issues/6701). +EOF +``` + +- [ ] **Step 2: Append the erasure section** + +Append: + +```markdown +## Right to erasure (GDPR Art. 17) + +Etherpad anonymises an author rather than deleting their changesets +(deletion would corrupt every pad they contributed to). Operators +trigger erasure via the admin REST API: + +```bash +curl -X POST \ + -H "Authorization: Bearer " \ + "https:///api/1.3.1/anonymizeAuthor?authorID=a.XXXXXXXXXXXXXX" +``` + +What the call does: + +- Zeros `name` and `colorId` on the `globalAuthor:` record + (kept as an opaque stub so changeset references still resolve to + "an author" with no details). +- Deletes every `token2author:` and `mapper2author:` + binding that pointed at this author. Once removed, a new session + with the same token starts a fresh anonymous identity. +- Nulls `authorId` on chat messages the author posted; message text + and timestamps are unchanged. + +What it does not do: + +- Delete pad content, revisions, or the attribute pool. If a pad + itself should also be erased, use the pad-deletion token flow + (PR1, `deletePad`). +- Touch other authors' edits. + +The call is idempotent: calling it twice on the same authorID +short-circuits the second time. +``` + +- [ ] **Step 3: Commit** + +```bash +git add doc/privacy.md +git commit -m "docs(gdpr): right-to-erasure section + anonymizeAuthor example" +``` + +--- + +## Task 6: Verify + push + open PR + +- [ ] **Step 1: Type check** + +Run: `pnpm --filter ep_etherpad-lite run ts-check` +Expected: exit 0. + +- [ ] **Step 2: Full backend test sweep** + +```bash +cd src && NODE_ENV=production pnpm exec mocha --require tsx/cjs \ + tests/backend/specs/anonymizeAuthor.ts \ + tests/backend/specs/api/anonymizeAuthor.ts \ + tests/backend/specs/api/api.ts --timeout 60000 +``` + +Expected: all pass. + +- [ ] **Step 3: Push + open PR** + +```bash +git push origin feat-gdpr-author-erasure +gh pr create --repo ether/etherpad --base develop --head feat-gdpr-author-erasure \ + --title "feat(gdpr): author erasure (PR5 of #6701)" --body "$(cat <<'EOF' +## Summary +- New `authorManager.anonymizeAuthor(authorID)` zeroes the display identity on `globalAuthor:`, deletes every `token2author:*` and `mapper2author:*` binding that points at the author, and nulls `authorId` on chat messages they posted. Pad content, revisions, and attribute pool are intact. +- New REST endpoint `POST /api/1.3.1/anonymizeAuthor?authorID=…` — admin-auth via the existing apikey/JWT pipeline. +- Idempotent. Zero counters on second call. +- `doc/privacy.md` explains what the call does and does not do. + +Final PR of the #6701 GDPR work. PR1 #7546 (deletion), PR2 #7547 (IP/privacy audit), PR3 #7548 (HttpOnly author cookie), PR4 #7549 (privacy banner) complete the set. + +Design: `docs/superpowers/specs/2026-04-19-gdpr-pr5-author-erasure-design.md` +Plan: `docs/superpowers/plans/2026-04-19-gdpr-pr5-author-erasure.md` + +## Test plan +- [x] ts-check +- [x] AuthorManager unit — identity zeroing, mappings removal, idempotence, unknown authorID +- [x] REST — successful erasure + missing-authorID error path +EOF +)" +``` + +- [ ] **Step 4: Monitor CI** + +Run: `gh pr checks --repo ether/etherpad` + +--- + +## Self-Review + +**Spec coverage:** + +| Spec section | Task | +| --- | --- | +| `globalAuthor:` zeroing + `erased: true` | 1 | +| `token2author:*` / `mapper2author:*` deletion | 1 | +| Chat `authorId` null-out | 1 | +| Idempotent second call | 1, 2 | +| REST endpoint + OpenAPI pickup via version map | 3 | +| Unit tests | 2 | +| REST integration tests | 4 | +| Docs | 5 | + +**Placeholders:** none. + +**Type consistency:** +- Return shape `{affectedPads, removedTokenMappings, removedExternalMappings, clearedChatMessages}` consistent across Tasks 1, 2, 4. +- `anonymizeAuthor(authorID: string)` signature identical in all three tasks. +- API version string `'1.3.1'` used only in Task 3 and referenced in Task 4 / Task 6 docs. From 73ae26e2436b9317b08eac55c49e565ff209603c Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 19 Apr 2026 10:05:38 +0100 Subject: [PATCH 3/8] =?UTF-8?q?feat(gdpr):=20AuthorManager.anonymizeAuthor?= =?UTF-8?q?=20=E2=80=94=20Art.=2017=20erasure?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/node/db/AuthorManager.ts | 87 ++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/src/node/db/AuthorManager.ts b/src/node/db/AuthorManager.ts index 4bcfa2c0d4a..4df9d0d83bd 100644 --- a/src/node/db/AuthorManager.ts +++ b/src/node/db/AuthorManager.ts @@ -313,3 +313,90 @@ exports.removePad = async (authorID: string, padID: string) => { await db.set(`globalAuthor:${authorID}`, author); } }; + +/** + * GDPR Art. 17: anonymise an author. Zeroes the display identity on + * `globalAuthor:`, deletes the token/mapper bindings that link a + * person to this authorID, and nulls authorship on chat messages they + * posted. Leaves pad content, revisions, and attribute pools intact — + * changeset references are opaque without the identity record, so the + * link to the real person is severed even though the bytes survive. + * + * Idempotent: once `erased: true` is set on the author record, subsequent + * calls short-circuit and return zero counters. + */ +exports.anonymizeAuthor = async (authorID: string): Promise<{ + affectedPads: number, + removedTokenMappings: number, + removedExternalMappings: number, + clearedChatMessages: number, +}> => { + // Lazy-require to dodge the AuthorManager ↔ PadManager ↔ Pad cycle. + const padManager = require('./PadManager'); + const existing = await db.get(`globalAuthor:${authorID}`); + if (existing == null || existing.erased) { + return { + affectedPads: 0, + removedTokenMappings: 0, + removedExternalMappings: 0, + clearedChatMessages: 0, + }; + } + + // Drop the token/mapper mappings first, before zeroing the display + // record, so a concurrent getAuthorId() can no longer resolve this + // author through its old bindings mid-erasure. + let removedTokenMappings = 0; + const tokenKeys: string[] = await db.findKeys('token2author:*', null); + for (const key of tokenKeys) { + if (await db.get(key) === authorID) { + await db.remove(key); + removedTokenMappings++; + } + } + let removedExternalMappings = 0; + const mapperKeys: string[] = await db.findKeys('mapper2author:*', null); + for (const key of mapperKeys) { + if (await db.get(key) === authorID) { + await db.remove(key); + removedExternalMappings++; + } + } + + // Zero the display identity. Keep `padIDs` so future maintenance (or a + // pad-delete batch) can still find the set of pads this authorID touched. + await db.set(`globalAuthor:${authorID}`, { + colorId: 0, + name: null, + timestamp: Date.now(), + padIDs: existing.padIDs || {}, + erased: true, + erasedAt: new Date().toISOString(), + }); + + // Null authorship on chat messages the author posted. + const padIDs = Object.keys(existing.padIDs || {}); + let clearedChatMessages = 0; + for (const padID of padIDs) { + if (!await padManager.doesPadExist(padID)) continue; + const pad = await padManager.getPad(padID); + const chatHead = pad.chatHead; + if (typeof chatHead !== 'number' || chatHead < 0) continue; + for (let i = 0; i <= chatHead; i++) { + const chatKey = `pad:${padID}:chat:${i}`; + const msg = await db.get(chatKey); + if (msg != null && msg.authorId === authorID) { + msg.authorId = null; + await db.set(chatKey, msg); + clearedChatMessages++; + } + } + } + + return { + affectedPads: padIDs.length, + removedTokenMappings, + removedExternalMappings, + clearedChatMessages, + }; +}; From 63c2261c8272a1dbf9fc7b30e8a5d9c2195604f7 Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 19 Apr 2026 10:06:54 +0100 Subject: [PATCH 4/8] test(gdpr): AuthorManager.anonymizeAuthor unit tests --- src/tests/backend/specs/anonymizeAuthor.ts | 74 ++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 src/tests/backend/specs/anonymizeAuthor.ts diff --git a/src/tests/backend/specs/anonymizeAuthor.ts b/src/tests/backend/specs/anonymizeAuthor.ts new file mode 100644 index 00000000000..bdcf9e2fecf --- /dev/null +++ b/src/tests/backend/specs/anonymizeAuthor.ts @@ -0,0 +1,74 @@ +'use strict'; + +import {strict as assert} from 'assert'; + +const common = require('../common'); +const authorManager = require('../../../node/db/AuthorManager'); +const DB = require('../../../node/db/DB'); + +describe(__filename, function () { + before(async function () { + this.timeout(60000); + await common.init(); + }); + + it('zeroes the display identity on globalAuthor:', async function () { + const mapper = `mapper-${Date.now()}-${Math.random().toString(36).slice(2)}`; + const {authorID} = await authorManager.createAuthorIfNotExistsFor(mapper, 'Alice'); + assert.equal(await authorManager.getAuthorName(authorID), 'Alice'); + + const res = await authorManager.anonymizeAuthor(authorID); + assert.ok(res.removedExternalMappings >= 1, + `removedExternalMappings=${res.removedExternalMappings}`); + + const record = await DB.db.get(`globalAuthor:${authorID}`); + assert.equal(record.name, null); + assert.equal(record.colorId, 0); + assert.equal(record.erased, true); + assert.ok(typeof record.erasedAt === 'string'); + }); + + it('drops token2author and mapper2author mappings pointing at the author', + async function () { + const mapper = `mapper-${Date.now()}-${Math.random().toString(36).slice(2)}`; + const {authorID} = await authorManager.createAuthorIfNotExistsFor(mapper, 'Bob'); + const token = + `t.${Date.now().toString(36)}${Math.random().toString(36).slice(2)}`; + // Seed a token2author: → authorID mapping directly so the test + // does not depend on getAuthorId creating a fresh author. + await DB.db.set(`token2author:${token}`, authorID); + + assert.equal(await DB.db.get(`token2author:${token}`), authorID); + assert.equal(await DB.db.get(`mapper2author:${mapper}`), authorID); + + const res = await authorManager.anonymizeAuthor(authorID); + assert.ok(res.removedTokenMappings >= 1, + `removedTokenMappings=${res.removedTokenMappings}`); + assert.ok(res.removedExternalMappings >= 1); + assert.ok((await DB.db.get(`token2author:${token}`)) == null); + assert.ok((await DB.db.get(`mapper2author:${mapper}`)) == null); + }); + + it('is idempotent — second call returns zero counters', async function () { + const mapper = `mapper-${Date.now()}-${Math.random().toString(36).slice(2)}`; + const {authorID} = await authorManager.createAuthorIfNotExistsFor(mapper, 'Carol'); + await authorManager.anonymizeAuthor(authorID); + const second = await authorManager.anonymizeAuthor(authorID); + assert.deepEqual(second, { + affectedPads: 0, + removedTokenMappings: 0, + removedExternalMappings: 0, + clearedChatMessages: 0, + }); + }); + + it('returns zero counters for an unknown authorID', async function () { + const res = await authorManager.anonymizeAuthor('a.does-not-exist'); + assert.deepEqual(res, { + affectedPads: 0, + removedTokenMappings: 0, + removedExternalMappings: 0, + clearedChatMessages: 0, + }); + }); +}); From 7bce1944cd0ef18a6d24da81ed7abb5051e0de8e Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 19 Apr 2026 10:07:48 +0100 Subject: [PATCH 5/8] feat(gdpr): REST anonymizeAuthor on API version 1.3.1 --- src/node/db/API.ts | 14 ++++++++++++++ src/node/handler/APIHandler.ts | 7 ++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/node/db/API.ts b/src/node/db/API.ts index 9ca5ca03c4b..bc30c50f114 100644 --- a/src/node/db/API.ts +++ b/src/node/db/API.ts @@ -61,6 +61,20 @@ exports.listAllPads = padManager.listAllPads; exports.createAuthor = authorManager.createAuthor; exports.createAuthorIfNotExistsFor = authorManager.createAuthorIfNotExistsFor; exports.getAuthorName = authorManager.getAuthorName; + +/** + * anonymizeAuthor(authorID) — GDPR Art. 17 erasure. See doc/privacy.md. + * + * Returns counters describing what was touched: + * {affectedPads, removedTokenMappings, removedExternalMappings, + * clearedChatMessages}. + */ +exports.anonymizeAuthor = async (authorID: string) => { + if (!authorID || typeof authorID !== 'string') { + throw new CustomError('authorID is required', 'apierror'); + } + return await authorManager.anonymizeAuthor(authorID); +}; exports.listPadsOfAuthor = authorManager.listPadsOfAuthor; exports.padUsers = padMessageHandler.padUsers; exports.padUsersCount = padMessageHandler.padUsersCount; diff --git a/src/node/handler/APIHandler.ts b/src/node/handler/APIHandler.ts index 32ce9d1189a..86dfabd0fe7 100644 --- a/src/node/handler/APIHandler.ts +++ b/src/node/handler/APIHandler.ts @@ -143,8 +143,13 @@ version['1.3.0'] = { }; +version['1.3.1'] = { + ...version['1.3.0'], + anonymizeAuthor: ['authorID'], +}; + // set the latest available API version here -exports.latestApiVersion = '1.3.0'; +exports.latestApiVersion = '1.3.1'; // exports the versions so it can be used by the new Swagger endpoint exports.version = version; From 372d618a648d061515c29b830b69681e40dd3813 Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 19 Apr 2026 10:09:10 +0100 Subject: [PATCH 6/8] test(gdpr): REST anonymizeAuthor end-to-end --- .../backend/specs/api/anonymizeAuthor.ts | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 src/tests/backend/specs/api/anonymizeAuthor.ts diff --git a/src/tests/backend/specs/api/anonymizeAuthor.ts b/src/tests/backend/specs/api/anonymizeAuthor.ts new file mode 100644 index 00000000000..b273f8c4018 --- /dev/null +++ b/src/tests/backend/specs/api/anonymizeAuthor.ts @@ -0,0 +1,51 @@ +'use strict'; + +import {strict as assert} from 'assert'; + +const common = require('../../common'); + +let agent: any; +let apiVersion = 1; +const endPoint = (point: string) => `/api/${apiVersion}/${point}`; + +const callApi = async (point: string, query: Record = {}) => { + const qs = new URLSearchParams(query).toString(); + const path = qs ? `${endPoint(point)}?${qs}` : endPoint(point); + return await agent.get(path) + .set('authorization', await common.generateJWTToken()) + .expect(200) + .expect('Content-Type', /json/); +}; + +describe(__filename, function () { + before(async function () { + this.timeout(60000); + agent = await common.init(); + const res = await agent.get('/api/').expect(200); + apiVersion = res.body.currentVersion; + }); + + it('anonymizeAuthor zeroes the author and returns counters', async function () { + const create = await callApi('createAuthor', {name: 'Alice'}); + assert.equal(create.body.code, 0); + const authorID = create.body.data.authorID; + + const res = await callApi('anonymizeAuthor', {authorID}); + assert.equal(res.body.code, 0, JSON.stringify(res.body)); + assert.ok(res.body.data.affectedPads >= 0); + + const name = await callApi('getAuthorName', {authorID}); + // getAuthorName returns the raw string/null directly in `data`. + // Post-erasure, the name is null. + assert.equal(name.body.data, null); + }); + + it('anonymizeAuthor with missing authorID returns an error', async function () { + const res = await agent.get(`${endPoint('anonymizeAuthor')}?authorID=`) + .set('authorization', await common.generateJWTToken()) + .expect(200) + .expect('Content-Type', /json/); + assert.equal(res.body.code, 1); + assert.match(res.body.message, /authorID is required/); + }); +}); From f250f572d8c6688ae3f5106d8e04b2dc990386ce Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 19 Apr 2026 10:09:45 +0100 Subject: [PATCH 7/8] docs(gdpr): right-to-erasure section + anonymizeAuthor example --- doc/privacy.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 doc/privacy.md diff --git a/doc/privacy.md b/doc/privacy.md new file mode 100644 index 00000000000..49912eda55e --- /dev/null +++ b/doc/privacy.md @@ -0,0 +1,37 @@ +# Privacy + +See [cookies.md](cookies.md) for the cookie list and the GDPR work +tracked in [ether/etherpad#6701](https://github.com/ether/etherpad/issues/6701). + +## Right to erasure (GDPR Art. 17) + +Etherpad anonymises an author rather than deleting their changesets +(deletion would corrupt every pad they contributed to). Operators +trigger erasure via the admin REST API: + +```bash +curl -X POST \ + -H "Authorization: Bearer " \ + "https:///api/1.3.1/anonymizeAuthor?authorID=a.XXXXXXXXXXXXXX" +``` + +What the call does: + +- Zeros `name` and `colorId` on the `globalAuthor:` record + (kept as an opaque stub so changeset references still resolve to + "an author" with no details). +- Deletes every `token2author:` and `mapper2author:` + binding that pointed at this author. Once removed, a new session + with the same token starts a fresh anonymous identity. +- Nulls `authorId` on chat messages the author posted; message text + and timestamps are unchanged. + +What it does not do: + +- Delete pad content, revisions, or the attribute pool. If a pad + itself should also be erased, use the pad-deletion token flow + (PR1, `deletePad`). +- Touch other authors' edits. + +The call is idempotent: calling it twice on the same authorID +short-circuits the second time and returns zero counters. From 16cd84a778524edf25774bf87691918763c6f8ea Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 19 Apr 2026 11:27:58 +0100 Subject: [PATCH 8/8] fix(gdpr): make anonymizeAuthor resumable on partial failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Qodo review: the `erased: true` sentinel was written before the chat scrub loop, so a throw during scrub left chat messages untouched while subsequent calls short-circuited on `existing.erased` and never finished. Split the write: zero the display identity first (still hides the name), run the chat scrub, and only then stamp `erased: true` so a retry resumes the sweep. Regression test covers the partial-run → retry path. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/node/db/AuthorManager.ts | 31 ++++++++++++++++------ src/tests/backend/specs/anonymizeAuthor.ts | 19 +++++++++++++ 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/src/node/db/AuthorManager.ts b/src/node/db/AuthorManager.ts index 4df9d0d83bd..b8495e4fe24 100644 --- a/src/node/db/AuthorManager.ts +++ b/src/node/db/AuthorManager.ts @@ -343,9 +343,11 @@ exports.anonymizeAuthor = async (authorID: string): Promise<{ }; } - // Drop the token/mapper mappings first, before zeroing the display - // record, so a concurrent getAuthorId() can no longer resolve this - // author through its old bindings mid-erasure. + // Drop the token/mapper mappings first, before touching anything else, so + // a concurrent getAuthorId() can no longer resolve this author through + // its old bindings mid-erasure. These operations are independently + // idempotent — rerunning a failed call later still produces the same + // final state, just with zero counters for anything already done. let removedTokenMappings = 0; const tokenKeys: string[] = await db.findKeys('token2author:*', null); for (const key of tokenKeys) { @@ -363,18 +365,19 @@ exports.anonymizeAuthor = async (authorID: string): Promise<{ } } - // Zero the display identity. Keep `padIDs` so future maintenance (or a - // pad-delete batch) can still find the set of pads this authorID touched. + // Zero the display identity now — without the `erased` sentinel — so a + // partial run still hides the name. The sentinel itself is only set at + // the end (below) so a failure in chat scrub lets the next call resume. await db.set(`globalAuthor:${authorID}`, { colorId: 0, name: null, timestamp: Date.now(), padIDs: existing.padIDs || {}, - erased: true, - erasedAt: new Date().toISOString(), }); - // Null authorship on chat messages the author posted. + // Null authorship on chat messages the author posted. If this throws + // partway through, the function re-runs the loop on the next call + // because `erased: true` is not set yet. const padIDs = Object.keys(existing.padIDs || {}); let clearedChatMessages = 0; for (const padID of padIDs) { @@ -393,6 +396,18 @@ exports.anonymizeAuthor = async (authorID: string): Promise<{ } } + // Everything succeeded — stamp the sentinel so subsequent calls + // short-circuit. Merge with the zeroed record we just wrote so padIDs + // and timestamp persist. + await db.set(`globalAuthor:${authorID}`, { + colorId: 0, + name: null, + timestamp: Date.now(), + padIDs: existing.padIDs || {}, + erased: true, + erasedAt: new Date().toISOString(), + }); + return { affectedPads: padIDs.length, removedTokenMappings, diff --git a/src/tests/backend/specs/anonymizeAuthor.ts b/src/tests/backend/specs/anonymizeAuthor.ts index bdcf9e2fecf..b7c47cf5ab0 100644 --- a/src/tests/backend/specs/anonymizeAuthor.ts +++ b/src/tests/backend/specs/anonymizeAuthor.ts @@ -71,4 +71,23 @@ describe(__filename, function () { clearedChatMessages: 0, }); }); + + it('re-runs the sweep when a prior call errored before setting erased=true', + async function () { + const mapper = `mapper-${Date.now()}-${Math.random().toString(36).slice(2)}`; + const {authorID} = await authorManager.createAuthorIfNotExistsFor(mapper, 'Dan'); + + // Simulate a partial run: zero the display identity but leave + // erased=false, matching a crash between the two writes. + const partial = await DB.db.get(`globalAuthor:${authorID}`); + partial.name = null; + partial.colorId = 0; + await DB.db.set(`globalAuthor:${authorID}`, partial); + + const res = await authorManager.anonymizeAuthor(authorID); + assert.equal(res.removedExternalMappings >= 1, true, + `retry must still clean mapper2author; got ${res.removedExternalMappings}`); + const record = await DB.db.get(`globalAuthor:${authorID}`); + assert.equal(record.erased, true); + }); });