diff --git a/bin/compactPad.ts b/bin/compactPad.ts new file mode 100644 index 00000000000..475a619e106 --- /dev/null +++ b/bin/compactPad.ts @@ -0,0 +1,91 @@ +'use strict'; + +/* + * Compact a pad's revision history to reclaim database space. + * + * Usage: + * node bin/compactPad.js # collapse all history + * node bin/compactPad.js --keep N # keep only the last N revisions + * + * Wraps the existing Cleanup helper (src/node/utils/Cleanup.ts) via the + * compactPad HTTP API so admins can trigger it from the CLI without + * routing through the admin settings UI. Destructive — export the pad as + * `.etherpad` first for backup. + * + * Issue #6194: long-lived pads with heavy edit history accumulate hundreds + * of megabytes in the DB; this tool is the per-pad brick for reclaiming + * that space without rotating to a new pad ID. + */ +import path from 'node:path'; +import fs from 'node:fs'; +import process from 'node:process'; +import axios from 'axios'; + +// As of v14, Node.js does not exit when there is an unhandled Promise rejection. Convert an +// unhandled rejection into an uncaught exception, which does cause Node.js to exit. +process.on('unhandledRejection', (err) => { throw err; }); + +const settings = require('ep_etherpad-lite/tests/container/loadSettings').loadSettings(); + +axios.defaults.baseURL = `http://${settings.ip}:${settings.port}`; + +const usage = () => { + console.error('Usage:'); + console.error(' node bin/compactPad.js '); + console.error(' node bin/compactPad.js --keep '); + process.exit(2); +}; + +const args = process.argv.slice(2); +if (args.length < 1 || args.length > 3) usage(); +const padId = args[0]; + +let keepRevisions: number | null = null; +if (args.length === 3) { + if (args[1] !== '--keep') usage(); + keepRevisions = Number(args[2]); + if (!Number.isInteger(keepRevisions) || keepRevisions < 0) { + console.error(`--keep expects a non-negative integer; got ${args[2]}`); + process.exit(2); + } +} + +// get the API Key +const filePath = path.join(__dirname, '../APIKEY.txt'); +const apikey = fs.readFileSync(filePath, {encoding: 'utf-8'}).trim(); + +(async () => { + const apiInfo = await axios.get('/api/'); + const apiVersion: string | undefined = apiInfo.data.currentVersion; + if (!apiVersion) throw new Error('No version set in API'); + + // Pre-flight: show current revision count so operators can eyeball impact. + const countUri = `/api/${apiVersion}/getRevisionsCount?apikey=${apikey}&padID=${padId}`; + const countRes = await axios.get(countUri); + if (countRes.data.code !== 0) { + console.error(`getRevisionsCount failed: ${JSON.stringify(countRes.data)}`); + process.exit(1); + } + const before: number = countRes.data.data.revisions; + const strategy = keepRevisions == null ? 'collapse all' : `keep last ${keepRevisions}`; + console.log(`Pad ${padId}: ${before + 1} revision(s). Strategy: ${strategy}.`); + + const params = new URLSearchParams({apikey, padID: padId}); + if (keepRevisions != null) params.set('keepRevisions', String(keepRevisions)); + const result = await axios.post(`/api/${apiVersion}/compactPad?${params.toString()}`); + if (result.data.code !== 0) { + console.error(`compactPad failed: ${JSON.stringify(result.data)}`); + process.exit(1); + } + + // Post-flight: the pad is now compacted. Re-read the rev count so the + // operator sees concrete savings. + const afterRes = await axios.get(countUri); + const after: number | undefined = afterRes.data?.data?.revisions; + if (after != null) { + console.log(`Done. Pad ${padId}: ${after + 1} revision(s) remaining ` + + `(was ${before + 1}).`); + } else { + console.log('Done.'); + } +})(); diff --git a/src/node/db/API.ts b/src/node/db/API.ts index 9ca5ca03c4b..9fd640a8f1a 100644 --- a/src/node/db/API.ts +++ b/src/node/db/API.ts @@ -635,6 +635,44 @@ exports.copyPadWithoutHistory = async (sourceID: string, destinationID: string, await pad.copyPadWithoutHistory(destinationID, force, authorId); }; +/** +compactPad(padID, [keepRevisions]) collapses the pad's revision history to +reclaim database space (issue #6194). Wraps the existing `Cleanup` helper +so admins can trigger it over the public API / CLI rather than only +through the admin settings UI. + +When `keepRevisions` is omitted (or `null`), all history is collapsed +into a single base revision that reproduces the current atext +(equivalent to a freshly-imported pad). When set to a positive integer +N, the pad keeps only its last N revisions (equivalent to +`cleanup.keepRevisions`). Pad text and chat history are preserved in +both modes. Destructive — recommend exporting the `.etherpad` snapshot +first. + +Example returns: + +{code: 0, message:"ok", data: {ok: true, mode: "all"}} +{code: 1, message:"padID does not exist", data: null} + + @param {String} padID the id of the pad to compact + @param {Number|null} keepRevisions number of recent revisions to keep; + null / omitted collapses the full history +*/ +exports.compactPad = async (padID: string, keepRevisions: number | null = null) => { + const pad = await getPadSafe(padID, true); + const cleanup = require('../utils/Cleanup'); + if (keepRevisions == null) { + await cleanup.deleteAllRevisions(pad.id); + return {ok: true, mode: 'all'}; + } + const keep = Number(keepRevisions); + if (!Number.isFinite(keep) || keep < 0) { + throw new CustomError('keepRevisions must be a non-negative integer', 'apierror'); + } + const ok = await cleanup.deleteRevisions(pad.id, keep); + return {ok, mode: 'keepLast', keepRevisions: keep}; +}; + /** movePad(sourceID, destinationID[, force=false]) moves a pad. If force is true, the destination will be overwritten if it exists. diff --git a/src/node/handler/APIHandler.ts b/src/node/handler/APIHandler.ts index 32ce9d1189a..7bb9fd1f6ab 100644 --- a/src/node/handler/APIHandler.ts +++ b/src/node/handler/APIHandler.ts @@ -142,9 +142,14 @@ version['1.3.0'] = { setText: ['padID', 'text', 'authorId'], }; +version['1.3.1'] = { + ...version['1.3.0'], + compactPad: ['padID', 'authorId'], +}; + // set the latest available API version here -exports.latestApiVersion = '1.3.0'; +exports.latestApiVersion = '1.3.1'; // exports the versions so it can be used by the new Swagger endpoint exports.version = version; diff --git a/src/tests/backend/specs/compactPad.ts b/src/tests/backend/specs/compactPad.ts new file mode 100644 index 00000000000..33e96cdea12 --- /dev/null +++ b/src/tests/backend/specs/compactPad.ts @@ -0,0 +1,83 @@ +'use strict'; + +const assert = require('assert').strict; +const common = require('../common'); +const padManager = require('../../../node/db/PadManager'); +const api = require('../../../node/db/API'); + +// Coverage for the compactPad API endpoint added in #6194. +// The underlying Cleanup logic is tested where it lives; these tests just +// verify the public-API wiring and argument handling. +describe(__filename, function () { + let padId: string; + + beforeEach(async function () { + padId = common.randomString(); + assert(!await padManager.doesPadExist(padId)); + }); + + describe('API.compactPad()', function () { + it('collapses all history when keepRevisions is omitted', async function () { + const pad = await padManager.getPad(padId); + await pad.appendText('marker-alpha\n'); + await pad.appendText('marker-beta\n'); + await pad.appendText('marker-gamma\n'); + const before = pad.getHeadRevisionNumber(); + assert.ok(before >= 3, `expected at least 3 revs, got ${before}`); + + const result = await api.compactPad(padId); + assert.deepStrictEqual(result, {ok: true, mode: 'all'}); + + // Reload: the compacted pad lands at head<=1 (matches the shape + // `copyPadWithoutHistory` produces). The content survives — we + // don't assert byte-exact equality because Cleanup.deleteAllRevisions + // goes through copyPadWithoutHistory twice and may adjust trailing + // whitespace; what we care about is that the author-written content + // is still there. + const reloaded = await padManager.getPad(padId); + assert.ok(reloaded.getHeadRevisionNumber() <= 1, + `expected head<=1, got ${reloaded.getHeadRevisionNumber()}`); + const text = reloaded.atext.text; + assert.ok(text.includes('marker-alpha'), 'alpha content preserved'); + assert.ok(text.includes('marker-beta'), 'beta content preserved'); + assert.ok(text.includes('marker-gamma'), 'gamma content preserved'); + }); + + it('keeps only the last N revisions when keepRevisions is a number', + async function () { + const pad = await padManager.getPad(padId); + for (let i = 0; i < 6; i++) await pad.appendText(`keep-line-${i}\n`); + const before = pad.getHeadRevisionNumber(); + + const result = await api.compactPad(padId, 2); + assert.strictEqual(result.mode, 'keepLast'); + assert.strictEqual(result.keepRevisions, 2); + + const reloaded = await padManager.getPad(padId); + assert.ok(reloaded.getHeadRevisionNumber() <= before); + // Content survives — whitespace normalization from the twin-copy + // roundtrip is ignored, we just check the actual text markers. + for (let i = 0; i < 6; i++) { + assert.ok(reloaded.atext.text.includes(`keep-line-${i}`), + `line ${i} survived compaction`); + } + }); + + it('rejects negative keepRevisions', async function () { + const pad = await padManager.getPad(padId); + await pad.appendText('content\n'); + await assert.rejects( + () => api.compactPad(padId, -1), + /keepRevisions must be a non-negative integer/); + }); + + it('rejects non-numeric keepRevisions', async function () { + const pad = await padManager.getPad(padId); + await pad.appendText('content\n'); + await assert.rejects( + // @ts-ignore - deliberately passing an invalid type + () => api.compactPad(padId, 'nope'), + /keepRevisions must be a non-negative integer/); + }); + }); +});