From a9f583c03b216b13bc17d9defd296fcbf43f5021 Mon Sep 17 00:00:00 2001 From: John McLear Date: Mon, 20 Apr 2026 09:24:24 +0100 Subject: [PATCH] fix(export): /export/etherpad honors the :rev URL segment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #5071. `/p/:pad/:rev/export/etherpad` has always ignored the rev parameter and returned the full pad history, unlike the txt/html export endpoints which use the same route but do respect rev. Users wanting to back up or inspect a snapshot of a pad at a specific rev got every later revision in the payload instead — both wasteful and a surprise when the downloaded .etherpad blob contained content that had supposedly been reverted. Change: - `exportEtherpad.getPadRaw(padId, readOnlyId, revNum?)` now takes an optional revNum. When supplied, it clamps to `min(revNum, pad.head)`, iterates only revs 0..effectiveHead, and ships a shallow-cloned pad object whose `head` and `atext` reflect the requested snapshot. The original live Pad is still passed to the `exportEtherpad` hook so plugin callbacks see the real document. - `ExportHandler` passes `req.params.rev` through on the `etherpad` type, matching the existing behavior of `txt` and `html`. - Chat history is intentionally left full (it is not rev-anchored). Adds three backend regression tests under `ExportEtherpad.ts`: - default (no revNum) still exports the full history - explicit revNum limits exported revs and rewrites the serialized head so re-import reconstructs the pad at that rev - revNum above head is treated as full history, preventing accidental truncation of short pads Out of scope: `getHTML(padID, rev)` on the API side is already honoring rev in current code (exportHtml.getPadHTML threads the parameter through), so the earlier report on that API call appears to be resolved. This PR does not touch it. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/node/handler/ExportHandler.ts | 5 ++- src/node/utils/ExportEtherpad.ts | 22 +++++++++-- src/tests/backend/specs/ExportEtherpad.ts | 48 +++++++++++++++++++++++ 3 files changed, 71 insertions(+), 4 deletions(-) diff --git a/src/node/handler/ExportHandler.ts b/src/node/handler/ExportHandler.ts index 8a03bd898c5..c296f971ce1 100644 --- a/src/node/handler/ExportHandler.ts +++ b/src/node/handler/ExportHandler.ts @@ -67,7 +67,10 @@ exports.doExport = async (req: any, res: any, padId: string, readOnlyId: string, // if this is a plain text export, we can do this directly // We have to over engineer this because tabs are stored as attributes and not plain text if (type === 'etherpad') { - const pad = await exportEtherpad.getPadRaw(padId, readOnlyId); + // Honor the :rev URL segment on `.etherpad` exports the same way the + // other formats already do — revNum limits the serialized pad to revs + // 0..rev (issue #5071). + const pad = await exportEtherpad.getPadRaw(padId, readOnlyId, req.params.rev); res.send(pad); } else if (type === 'txt') { const txt = await exporttxt.getPadTXTDocument(padId, req.params.rev); diff --git a/src/node/utils/ExportEtherpad.ts b/src/node/utils/ExportEtherpad.ts index 292fbcec49e..aba6ddc81d8 100644 --- a/src/node/utils/ExportEtherpad.ts +++ b/src/node/utils/ExportEtherpad.ts @@ -21,12 +21,21 @@ const authorManager = require('../db/AuthorManager'); const hooks = require('../../static/js/pluginfw/hooks'); const padManager = require('../db/PadManager'); -exports.getPadRaw = async (padId:string, readOnlyId:string) => { +exports.getPadRaw = async (padId:string, readOnlyId:string, revNum?: number) => { const dstPfx = `pad:${readOnlyId || padId}`; const [pad, customPrefixes] = await Promise.all([ padManager.getPad(padId), hooks.aCallAll('exportEtherpadAdditionalContent'), ]); + // If a rev limit was supplied, clamp to it and also clamp chat to the + // timestamp-ordered window that ended at that rev. Without this, a rev=5 + // export on a pad with head=100 would still ship all 95 later revisions + // (and leak their content via the exported .etherpad file) — which is + // precisely what issue #5071 reported. + const padHead: number = pad.head; + const effectiveHead: number = (revNum == null || revNum > padHead) ? padHead : revNum; + const isRevBound = revNum != null && revNum < padHead; + const boundAtext = isRevBound ? await pad.getInternalRevisionAText(effectiveHead) : null; const pluginRecords = await Promise.all(customPrefixes.map(async (customPrefix:string) => { const srcPfx = `${customPrefix}:${padId}`; const dstPfx = `${customPrefix}:${readOnlyId || padId}`; @@ -49,11 +58,18 @@ exports.getPadRaw = async (padId:string, readOnlyId:string) => { return authorEntry; })()]; } - for (let i = 0; i <= pad.head; ++i) yield [`${dstPfx}:revs:${i}`, pad.getRevision(i)]; + for (let i = 0; i <= effectiveHead; ++i) yield [`${dstPfx}:revs:${i}`, pad.getRevision(i)]; for (let i = 0; i <= pad.chatHead; ++i) yield [`${dstPfx}:chat:${i}`, pad.getChatMessage(i)]; for (const gen of pluginRecords) yield* gen; })(); - const data = {[dstPfx]: pad}; + // When rev-bound, serialize a shallow-cloned pad object with head/atext + // rewritten so the import side reconstructs the pad at the requested rev. + // toJSON() returns a plain object suitable for spreading; the live Pad + // instance is kept for the exportEtherpad hook below. + const serializedPad = isRevBound + ? {...(pad.toJSON()), head: effectiveHead, atext: boundAtext} + : pad; + const data = {[dstPfx]: serializedPad}; for (const [dstKey, p] of new Stream(records).batch(100).buffer(99)) data[dstKey] = await p; await hooks.aCallAll('exportEtherpad', { pad, diff --git a/src/tests/backend/specs/ExportEtherpad.ts b/src/tests/backend/specs/ExportEtherpad.ts index 1ee7ab001e7..a8333bf6632 100644 --- a/src/tests/backend/specs/ExportEtherpad.ts +++ b/src/tests/backend/specs/ExportEtherpad.ts @@ -64,4 +64,52 @@ describe(__filename, function () { assert(!(`custom:${padId}x:foo` in data)); }); }); + + // Regression test for https://github.com/ether/etherpad/issues/5071. + // `/p/:pad/:rev/export/etherpad` and getPadRaw() historically ignored the + // rev parameter and always exported the full history, surprising users + // who wanted to back up or inspect an earlier snapshot. + describe('revNum bounding (issue #5071)', function () { + const addRevs = async (pad: any, n: number) => { + // Each call to .appendRevision bumps head by one, producing a + // distinct revision we can count in the exported payload. + for (let i = 0; i < n; i++) { + await pad.appendText(`line ${i}\n`); + } + }; + + it('defaults to full history when revNum is omitted', async function () { + const pad = await padManager.getPad(padId); + await addRevs(pad, 3); + const data = await exportEtherpad.getPadRaw(padId, null); + // revs 0 (pad-create) through pad.head inclusive. + const revKeys = + Object.keys(data).filter((k) => k.startsWith(`pad:${padId}:revs:`)); + assert.equal(revKeys.length, pad.head + 1); + assert.equal(data[`pad:${padId}`].head, pad.head); + }); + + it('limits exported revisions to 0..revNum when supplied', async function () { + const pad = await padManager.getPad(padId); + await addRevs(pad, 5); + const bound = 2; + const data = await exportEtherpad.getPadRaw(padId, null, bound); + const revKeys = + Object.keys(data).filter((k) => k.startsWith(`pad:${padId}:revs:`)); + assert.equal(revKeys.length, bound + 1, + `expected ${bound + 1} revisions, got ${revKeys.length}`); + assert(!(`pad:${padId}:revs:${bound + 1}` in data), + 'rev after bound must not be exported'); + // The serialized pad must also reflect the bounded head so that + // re-importing reconstructs the pad at the requested rev. + assert.equal(data[`pad:${padId}`].head, bound); + }); + + it('treats a revNum above head as equivalent to full history', async function () { + const pad = await padManager.getPad(padId); + await addRevs(pad, 3); + const data = await exportEtherpad.getPadRaw(padId, null, pad.head + 100); + assert.equal(data[`pad:${padId}`].head, pad.head); + }); + }); });