diff --git a/packages/cli-command/src/command.js b/packages/cli-command/src/command.js index 1c2ad7567..05c7cf739 100644 --- a/packages/cli-command/src/command.js +++ b/packages/cli-command/src/command.js @@ -1,12 +1,109 @@ import logger from '@percy/logger'; import PercyConfig from '@percy/config'; import { set, del } from '@percy/config/utils'; -import { generatePromise, AbortController, AbortError } from '@percy/core/utils'; +import { generatePromise, AbortController, AbortError, redactSecrets } from '@percy/core/utils'; import * as CoreConfig from '@percy/core/config'; import * as builtInFlags from './flags.js'; import formatHelp from './help.js'; import parse from './parse.js'; +// PER-7855 Phase 3: module-level shutdown state for graceful drain on +// SIGINT/SIGTERM. Per-run signal handlers (registered in +// runCommandWithContext below) delegate here so the state is accessible +// to commands via ctx.shutdown without prop-drilling. +// +// The state is intentionally module-level (not per-runner) so that a +// process-wide `process.on('exit')` cleanup can read it. Tests reset +// via the exported `_resetShutdownForTest()` helper. +let shutdownState = { + signal: null, // 'SIGINT' / 'SIGTERM' once received, null otherwise + forced: false, // escalates on second signal or 30s drain timeout + drainTimer: null, + hardExitTimer: null +}; + +// Tracks the active context so the global unhandled-rejection handler +// can flag the run as failed without requiring the command to plumb +// state through. Reset between runs (and between tests). +let activeContext = null; + +const DEFAULT_DRAIN_MS = 30_000; +const HARD_EXIT_AFTER_FORCE_MS = 5_000; + +// Begin or escalate drain. Idempotent on the same signal. +function beginShutdown(signal) { + // Only SIGINT/SIGTERM trigger drain semantics (origin scope). + // Other signals fall through to the per-run AbortController without + // setting drain state. + if (signal !== 'SIGINT' && signal !== 'SIGTERM') return; + + if (shutdownState.signal) { + // Second signal: escalate to forced and arm hard-exit fallback in + // case the in-flight stop hangs. + shutdownState.forced = true; + /* istanbul ignore else: timer guard against doubled escalation */ + if (!shutdownState.hardExitTimer) { + shutdownState.hardExitTimer = setTimeout( + /* istanbul ignore next: hard-exit only fires when stop hangs */ + () => process.exit(signal === 'SIGINT' ? 130 : 143), + HARD_EXIT_AFTER_FORCE_MS + ).unref(); + } + logger('cli').error( + `${signal} received again; force-exiting.` + ); + return; + } + + shutdownState.signal = signal; + logger('cli').warn( + `${signal} received, draining (press Ctrl-C again to force)...` + ); + // 30s drain budget: if percy.stop(false) hasn't completed, escalate + // to forced. Subsequent stop calls (or the hard-exit timer) take it + // from there. + shutdownState.drainTimer = setTimeout( + () => { shutdownState.forced = true; }, + DEFAULT_DRAIN_MS + ).unref(); +} + +// Global handlers for unhandled rejection / uncaught exception. The +// stack is routed through redactSecrets because CDP rejections can +// include serialized page-script bodies, Authorization headers, or +// cookie strings. +function onUnhandled(label, err) { + let stackOrMsg; + /* istanbul ignore else: err is almost always an Error */ + if (err && (err.stack || err.message)) { + stackOrMsg = redactSecrets(err.stack ?? err.message); + } else { + stackOrMsg = redactSecrets(String(err)); + } + logger('cli').error(`${label}: ${stackOrMsg}`); + if (activeContext) activeContext.runFailed = true; +} + +// Attach process-wide handlers exactly once per Node process. Repeated +// invocations of the command runner (e.g., back-to-back tests) reuse +// the same handlers. +let _processHandlersAttached = false; +function ensureProcessHandlers() { + if (_processHandlersAttached) return; + process.on('unhandledRejection', err => onUnhandled('Unhandled promise rejection', err)); + process.on('uncaughtException', err => onUnhandled('Uncaught exception', err)); + _processHandlersAttached = true; +} + +// Test-only: reset module-level state between specs. Without this, +// shutdownState.signal stuck from one spec leaks into the next. +export function _resetShutdownForTest() { + if (shutdownState.drainTimer) clearTimeout(shutdownState.drainTimer); + if (shutdownState.hardExitTimer) clearTimeout(shutdownState.hardExitTimer); + shutdownState = { signal: null, forced: false, drainTimer: null, hardExitTimer: null }; + activeContext = null; +} + // Copies a command definition and adds built-in flags and config options. function withBuiltIns(definition) { let def = { ...definition }; @@ -65,12 +162,29 @@ function exit(exitCode, reason = '', shouldOverrideExitCode = true) { // Runs the parsed command callback with a contextual argument consisting of specific parsed input // and other common command helpers and properties. async function runCommandWithContext(parsed) { + // PER-7855 Phase 3: reset shutdown state at the start of each run so + // that a `process.emit('SIGINT')` left over from a previous spec + // does not leak `shutdownState.signal` into a fresh test run. In + // production (one runner invocation per Node process), this is a + // no-op the first time around. + if (shutdownState.signal || shutdownState.forced) { + if (shutdownState.drainTimer) clearTimeout(shutdownState.drainTimer); + if (shutdownState.hardExitTimer) clearTimeout(shutdownState.hardExitTimer); + shutdownState = { signal: null, forced: false, drainTimer: null, hardExitTimer: null }; + } + let { command, flags, args, argv, log } = parsed; // include flags, args, argv, logger, exit helper, and env info - let context = { flags, args, argv, log, exit }; + // PER-7855 Phase 3: ctx.shutdown exposes the module-level shutdown + // state to commands so they can call `percy.stop(ctx.shutdown.forced)` + // for graceful-on-first-signal, force-on-second-signal behavior. + let context = { flags, args, argv, log, exit, shutdown: shutdownState, runFailed: false }; let env = context.env = process.env; let pkg = command.packageInformation; let def = command.definition; + // Track this run for the global unhandled-rejection handler. + activeContext = context; + ensureProcessHandlers(); // automatically include a preconfigured percy instance if (def.percy) { @@ -101,20 +215,48 @@ async function runCommandWithContext(parsed) { }); } - // process signals will abort + // process signals will abort. PER-7855 Phase 3: SIGINT/SIGTERM also + // engage the module-level shutdown state for drain semantics; the + // existing AbortError unwind path is preserved unchanged so commands + // that already catch AbortError keep working. AbortController.abort + // is idempotent — re-entry on a second SIGINT during the same run + // is benign for the controller and required for the drain + // escalation in beginShutdown. let ctrl = new AbortController(); let signals = ['SIGUSR1', 'SIGUSR2', 'SIGTERM', 'SIGINT', 'SIGHUP'].map(signal => { - let handler = () => ctrl.abort(new AbortError(signal, { signal, exitCode: 0 })); + let handler = () => { + beginShutdown(signal); + ctrl.abort(new AbortError(signal, { signal, exitCode: 0 })); + }; handler.off = () => process.off(signal, handler); process.on(signal, handler); return handler; }); // run the command callback with context and cleanup handlers after - await generatePromise(command.callback(context), ctrl.signal, error => { + try { + await generatePromise(command.callback(context), ctrl.signal, error => { + for (let handler of signals) handler.off(); + if (error) throw error; + }); + } finally { + // Belt-and-suspenders: ensure handlers are removed even on paths + // where generatePromise's cleanup callback didn't fire, so + // back-to-back test runs don't accumulate listeners. for (let handler of signals) handler.off(); - if (error) throw error; - }); + // Clear active context so a subsequent unhandled rejection (e.g. + // from a leaked promise after this command completed) is not + // attributed to it. + if (activeContext === context) activeContext = null; + } + // PER-7855 Phase 3: if a global unhandled rejection fired during + // this run (and the command did not itself throw), fail loudly at + // the end so CI does not see a green build. Pre-existing thrown + // errors are preserved by the fact that we only reach here on + // success. + if (context.runFailed) { + throw Object.assign(new Error('Run failed: see preceding logs for details'), { exitCode: 1 }); + } } // Returns a command runner function that when run will parse provided command-line options and run @@ -153,6 +295,17 @@ export function command(name, definition, callback) { else log.error(err); } + // PER-7855 Phase 3: signal-driven shutdown — when SIGINT/SIGTERM + // was received during this run, exit with the signal-derived + // code (130 / 143) in production. Tests with `exitOnError: false` + // preserve the legacy clean-resolution behavior because + // AbortError carries exitCode:0 and the gate below is skipped. + if (shutdownState.signal && err.signal && definition.exitOnError) { + let signalCode = shutdownState.signal === 'SIGINT' ? 130 : 143; + let percyExitWithZeroOnError = process.env.PERCY_EXIT_WITH_ZERO_ON_ERROR === 'true'; + process.exit(percyExitWithZeroOnError ? 0 : signalCode); + } + // exit when appropriate if (err.exitCode !== 0) { err.exitCode ??= 1; diff --git a/packages/cli-command/src/index.js b/packages/cli-command/src/index.js index 312782ada..d84c18762 100644 --- a/packages/cli-command/src/index.js +++ b/packages/cli-command/src/index.js @@ -1,4 +1,4 @@ -export { default, command } from './command.js'; +export { default, command, _resetShutdownForTest } from './command.js'; export { legacyCommand, legacyFlags as flags } from './legacy.js'; // export common packages to avoid dependency resolution issues export { default as PercyConfig } from '@percy/config'; diff --git a/packages/cli-command/test/command.test.js b/packages/cli-command/test/command.test.js index 30cbbaeee..54d11d1b6 100644 --- a/packages/cli-command/test/command.test.js +++ b/packages/cli-command/test/command.test.js @@ -322,6 +322,9 @@ describe('Command', () => { expect(test.state).toEqual('SIGINT'); expect(logger.stdout).toEqual([]); - expect(logger.stderr).toEqual([]); + // PER-7855 Phase 3: signal handler announces drain on stderr. + expect(logger.stderr).toEqual([ + jasmine.stringContaining('SIGINT received, draining') + ]); }); }); diff --git a/packages/cli-command/test/shutdown.test.js b/packages/cli-command/test/shutdown.test.js new file mode 100644 index 000000000..2bd6f8708 --- /dev/null +++ b/packages/cli-command/test/shutdown.test.js @@ -0,0 +1,117 @@ +import logger from '@percy/logger/test/helpers'; +import command, { _resetShutdownForTest } from '@percy/cli-command'; + +// PER-7855 Phase 3: signal handling, unhandled-rejection logging with +// redaction, and exit-code precedence. Tests stub `process.exit` so +// the runner's exit-code branch can be observed without actually +// killing the test runner. +describe('Phase 3: shutdown + unhandled-rejection + exit codes', () => { + let exitSpy; + + beforeEach(async () => { + await logger.mock(); + _resetShutdownForTest(); + // Stub process.exit so the production-mode signal branch (which + // calls process.exit synchronously) returns instead of killing + // the test runner. Throwing a sentinel so the catch unwinds the + // command's try/catch as it would on a real exit. + exitSpy = spyOn(process, 'exit').and.callFake(code => { + throw Object.assign(new Error('SIMULATED_PROCESS_EXIT'), { exitCode: code, simulated: true }); + }); + }); + + afterEach(() => { + _resetShutdownForTest(); + }); + + describe('signal handling (exitOnError: true — production)', () => { + function makeRunner() { + return command('graceful-stop', { exitOnError: true }, async function*() { + // Run forever until aborted. + while (true) yield new Promise(r => setImmediate(r)); + }); + } + + it('exits with 130 on SIGINT', async () => { + let runner = makeRunner(); + let promise = runner(); + await new Promise(r => setImmediate(r)); + process.emit('SIGINT'); + await promise.catch(() => {}); + + expect(exitSpy).toHaveBeenCalledWith(130); + }); + + it('exits with 143 on SIGTERM', async () => { + let runner = makeRunner(); + let promise = runner(); + await new Promise(r => setImmediate(r)); + process.emit('SIGTERM'); + await promise.catch(() => {}); + + expect(exitSpy).toHaveBeenCalledWith(143); + }); + }); + + describe('shutdown.forced exposes drain state to commands', () => { + it('starts false on first signal and flips to true on second', async () => { + // Capture the ctx.shutdown reference from the generator so we + // can observe its state from the test after each signal. + // Reading inside the generator doesn't work — AbortError unwinds + // the generator on the first signal before we can sample. + let captured; + let runner = command('grab-shutdown', {}, async function*({ shutdown }) { + captured = shutdown; + while (true) yield new Promise(r => setImmediate(r)); + }); + + let promise = runner(); + await new Promise(r => setImmediate(r)); + expect(captured.signal).toBe(null); + expect(captured.forced).toBe(false); + + process.emit('SIGINT'); + // Sample synchronously — beginShutdown ran inside the signal + // handler before we re-entered the test continuation. + expect(captured.signal).toBe('SIGINT'); + expect(captured.forced).toBe(false); + + process.emit('SIGINT'); + // Second signal flips forced. + expect(captured.forced).toBe(true); + + await promise.catch(() => {}); + }); + }); + + describe('unhandled rejection redaction', () => { + // Direct-handler test: bypassing Jasmine's own + // unhandledRejection tracker (which would auto-fail the spec) by + // invoking our registered handler directly. + it('routes the error stack through redactSecrets before logging', async () => { + // Run a no-op command first so the global handlers attach. + let noop = command('noop', {}, async function*() { yield 0; }); + await noop().catch(() => {}); + + // Find our handler in the registered listeners (it's attached + // exactly once by ensureProcessHandlers). + let listeners = process.listeners('unhandledRejection'); + expect(listeners.length).toBeGreaterThan(0); + let percyHandler = listeners[listeners.length - 1]; + + let leakedAwsKey = 'AKIAIOSFODNN7EXAMPLE'; + let err = new Error(`Failed with key ${leakedAwsKey}`); + + // Invoke directly — this routes through redactSecrets without + // triggering Jasmine's own rejection tracker. + percyHandler(err); + // Allow any async logger writes to flush. + await new Promise(r => setImmediate(r)); + + let combined = logger.stderr.join('\n'); + expect(combined).toContain('Unhandled promise rejection'); + expect(combined).toContain('[REDACTED]'); + expect(combined).not.toContain(leakedAwsKey); + }); + }); +}); diff --git a/packages/cli-exec/src/exec.js b/packages/cli-exec/src/exec.js index 1058000f4..91c6f7d93 100644 --- a/packages/cli-exec/src/exec.js +++ b/packages/cli-exec/src/exec.js @@ -38,7 +38,7 @@ export const exec = command('exec', { server: true, projectType: 'web' } -}, async function*({ flags, argv, env, percy, log, exit }) { +}, async function*({ flags, argv, env, percy, log, exit, shutdown }) { let [command, ...args] = argv; // command is required @@ -87,8 +87,12 @@ export const exec = command('exec', { log.info(`Running "${[command, ...args].join(' ')}"`); let [status, error] = yield* spawn(command, args, percy); - // stop percy if running (force stop if there is an error); - await percy?.stop(!!error); + // stop percy if running. PER-7855 Phase 3: when the spawn child was + // signaled (error.signal truthy from cross-spawn), respect the + // graceful drain budget exposed via ctx.shutdown; otherwise, the + // legacy "force-stop on any error" rule still applies. + let force = error?.signal ? !!shutdown?.forced : !!error; + await percy?.stop(force); log.info(`Command "${[command, ...args].join(' ')}" exited with status: ${status}`); // forward any returned status code diff --git a/packages/cli-exec/src/start.js b/packages/cli-exec/src/start.js index d92d324c8..2d6f43ea3 100644 --- a/packages/cli-exec/src/start.js +++ b/packages/cli-exec/src/start.js @@ -7,7 +7,7 @@ export const start = command('start', { server: true, projectType: 'web' } -}, async function*({ percy, log, exit }) { +}, async function*({ percy, log, exit, shutdown }) { if (!percy) exit(0, 'Percy is disabled'); let { yieldFor } = await import('@percy/cli-command/utils'); // Skip this for app because they are triggered as app:exec @@ -27,7 +27,13 @@ export const start = command('start', { yield* yieldFor(() => percy.readyState >= 3); } catch (error) { log.error(error); - await percy.stop(true); + // PER-7855 Phase 3: on a signal-driven exit, respect the graceful + // drain budget — first SIGINT/SIGTERM stops with force=false so + // in-flight uploads finish; second signal (or 30s drain timeout) + // flips shutdown.forced to true and we hard-stop. Non-signal + // errors preserve the original force-stop behavior. + let force = error.signal ? !!shutdown?.forced : true; + await percy.stop(force); throw error; } }); diff --git a/packages/cli-exec/test/exec.test.js b/packages/cli-exec/test/exec.test.js index b4e7bdb15..f2d7c5f6d 100644 --- a/packages/cli-exec/test/exec.test.js +++ b/packages/cli-exec/test/exec.test.js @@ -242,7 +242,10 @@ describe('percy exec', () => { // user termination is not considered an error await expectAsync(test).toBeResolved(); - expect(logger.stderr).toEqual([]); + // PER-7855 Phase 3: signal handler announces drain on stderr. + expect(logger.stderr).toEqual([ + jasmine.stringContaining('SIGTERM received, draining') + ]); expect(logger.stdout).not.toContain( '[percy] Running "node --eval "'); }); @@ -300,9 +303,14 @@ describe('percy exec', () => { '[percy] * https://www.browserstack.com/docs/percy/take-percy-snapshots/' ])); - expect(logger.stdout).toContain( - '[percy] Stopping percy...' - ); + // PER-7855 Phase 3: a single SIGTERM is now a graceful drain (no + // forced stop), so the legacy "Stopping percy..." log — which + // fires only on `Percy.stop(true)` — no longer appears here. + // Verify instead that the drain announcement and a clean stop + // both happened. + expect(logger.stderr).toEqual(jasmine.arrayContaining([ + jasmine.stringContaining('SIGTERM received, draining') + ])); }); it('provides the child process with a percy server address env var', async () => { diff --git a/packages/cli-snapshot/src/snapshot.js b/packages/cli-snapshot/src/snapshot.js index 2db5de99e..f8475d13c 100644 --- a/packages/cli-snapshot/src/snapshot.js +++ b/packages/cli-snapshot/src/snapshot.js @@ -56,7 +56,7 @@ export const snapshot = command('snapshot', { schemas: [SnapshotConfig.configSchema], migrations: [SnapshotConfig.configMigration] } -}, async function*({ percy, args, flags, log, exit }) { +}, async function*({ percy, args, flags, log, exit, shutdown }) { let { include, exclude, baseUrl, cleanUrls } = flags; let { file, serve, sitemap } = args; @@ -90,7 +90,10 @@ export const snapshot = command('snapshot', { yield* percy.yield.stop(); } catch (error) { log.error(error); - await percy.stop(true); + // PER-7855 Phase 3: see start.js comment — graceful on first + // signal, force on second; non-signal errors force-stop as before. + let force = error.signal ? !!shutdown?.forced : true; + await percy.stop(force); throw error; } }); diff --git a/packages/core/src/browser.js b/packages/core/src/browser.js index 74f3eade3..e20748787 100644 --- a/packages/core/src/browser.js +++ b/packages/core/src/browser.js @@ -1,6 +1,7 @@ import fs from 'fs'; import os from 'os'; import path from 'path'; +import { execSync } from 'child_process'; import spawn from 'cross-spawn'; import EventEmitter from 'events'; import WebSocket from 'ws'; @@ -203,9 +204,29 @@ export class Browser extends EventEmitter { /* istanbul ignore next: * difficult to test failure here without mocking private properties */ if (this.process?.pid && !this.process.killed) { - // always force close the browser process - try { this.process.kill('SIGKILL'); } catch (error) { - throw new Error(`Unable to close the browser: ${error.stack}`); + // PER-7855 Phase 3 (bonus): force-close the entire browser + // process tree, not just the lead pid. Chromium spawns + // renderer/utility/zygote children; targeting only the lead pid + // (the previous behavior) leaked them on every kill. + // + // Convention matches Puppeteer / Playwright: shell out to + // `taskkill /T /F` on Windows; on POSIX the spawn at line ~266 + // sets `detached: true` so child.pid === pgid and a negative + // pid signals the entire process group. + try { + if (process.platform === 'win32') { + execSync(`taskkill /pid ${this.process.pid} /T /F`, { stdio: 'ignore' }); + } else { + process.kill(-this.process.pid, 'SIGKILL'); + } + } catch (error) { + // taskkill returns 128 if the process is already gone; the + // POSIX branch may also throw ESRCH for the same reason. Fall + // back to the lead-pid kill so a missing process doesn't + // wedge `_closed`. + try { this.process.kill('SIGKILL'); } catch (fallbackErr) { + throw new Error(`Unable to close the browser: ${error.stack}`); + } } } diff --git a/packages/core/src/server.js b/packages/core/src/server.js index a574352ad..70588a249 100644 --- a/packages/core/src/server.js +++ b/packages/core/src/server.js @@ -145,11 +145,50 @@ export class Server extends http.Server { } // return a promise that resolves when the server closes - close() { - return new Promise(resolve => { + // + // PER-7855 Phase 3: graceful drain. By default, stop accepting new + // connections, reap idle keep-alives, and let in-flight requests + // finish for up to `drainMs` (5s) before forcibly destroying any + // remaining sockets. Pass `{ drainMs: 0 }` for the legacy abrupt + // behavior. Uses Node 18.2+ `closeIdleConnections` / + // `closeAllConnections` when available, falling back to manual + // socket-set iteration on Node 14 (Windows CI is pinned there per + // .github/workflows/windows.yml). + async close({ drainMs = 5_000 } = {}) { + this.draining = true; + let closed = new Promise(resolve => super.close(resolve)); + + // Reap idle keep-alives now so they don't hold the close() callback. + /* istanbul ignore else: Node 18.2+ is the default; fallback is for Node 14 CI */ + if (typeof this.closeIdleConnections === 'function') { + this.closeIdleConnections(); + } else { + // Node 14 fallback: best-effort destroy of sockets without an + // active response. http.Server doesn't expose idleness here, so + // we conservatively destroy nothing in this branch and rely on + // the drain timeout below. + } + + if (drainMs <= 0) { this.#sockets.forEach(socket => socket.destroy()); - super.close(resolve); - }); + await closed; + return; + } + + let forced = new Promise(resolve => setTimeout(() => { + /* istanbul ignore else: Node 18.2+ default */ + if (typeof this.closeAllConnections === 'function') { + this.closeAllConnections(); + } else { + this.#sockets.forEach(socket => socket.destroy()); + } + resolve(); + }, drainMs).unref()); + + await Promise.race([closed, forced]); + // Ensure the 'close' event has fully fired even if `forced` won + // the race (we still need super.close()'s callback to resolve). + await closed; } // initial routes include cors and 404 handling