From e135e9a4634d0e05df82a742c743f98200805df7 Mon Sep 17 00:00:00 2001 From: Shivanshu07 Date: Mon, 27 Apr 2026 19:18:05 +0530 Subject: [PATCH] feat(core): structured abort errors, per-instance network timeout, log redaction (PER-7855) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 of PER-7855 CLI QoS hardening — network refactors plus small wins: R4 — Move `Network.TIMEOUT` from a static class field to a per-instance `networkIdleWaitTimeout`, derived from PERCY_NETWORK_IDLE_WAIT_TIMEOUT in the constructor. Concurrent pages with different env values no longer overwrite each other's timeout. R5 — Export `AbortCodes` enum (`ABORTED`, `TIMEOUT_NETWORK_IDLE`). Throws from `Network#send` for aborted requests now carry `{code, reason}` via the existing `AbortError` class. The consumer at `network.js:529` prefers `error.code === 'ABORTED'`; legacy string-match clauses retained for BC. R6 — Wrap `redactSecrets()` around the warn/debug logs in `executeDomainValidation` (`utils.js:200, 212-213`). Upstream errors that echo response bodies no longer leak AWS keys, URL-embedded credentials, etc., to stderr or build logs. R7 — Append actionable hint to network-idle timeout message: "Hint: set PERCY_NETWORK_IDLE_WAIT_TIMEOUT to increase the budget, or allowlist slow domains via the discovery config." Implementation note: the deepened plan called for `_throwTimeoutError` to throw `AbortError`, but `error.name === 'AbortError'` is checked by `discovery.js:520`, `percy.js:347`, and `snapshot.js:472` — all of which treat aborts as "snapshot cancelled" rather than as errors. The network-idle timeout uses a plain `Error` with `code`/`reason` properties; only the explicit browser-cancellation path uses `AbortError`. Tests added: 6 new specs (SC6 per-instance timeout, R5 AbortCodes shape, SC8 redactSecrets fixtures for AWS keys + URL-embedded creds). Existing idle-timeout assertions in `discovery.test.js` updated for the new hint message and removed the `Network.TIMEOUT` reset infra that the static-field refactor obviates. Origin: docs/brainstorms/2026-04-24-per-7855-cli-qos-hardening-requirements.md Plan: docs/plans/2026-04-27-001-feat-per-7855-cli-qos-hardening-plan.md Phase 2 next: per-port lockfile (PER-7855) Co-Authored-By: Claude Opus 4.7 (1M context, extended thinking) --- packages/core/src/network.js | 41 ++++++++++++------ packages/core/src/utils.js | 10 +++-- packages/core/test/discovery.test.js | 17 ++++---- packages/core/test/unit/network.test.js | 56 +++++++++++++++++++++++++ packages/core/test/unit/utils.test.js | 16 +++++++ 5 files changed, 117 insertions(+), 23 deletions(-) create mode 100644 packages/core/test/unit/network.test.js diff --git a/packages/core/src/network.js b/packages/core/src/network.js index dba250149..6e21178bb 100644 --- a/packages/core/src/network.js +++ b/packages/core/src/network.js @@ -1,13 +1,20 @@ import { request as makeRequest } from '@percy/client/utils'; import logger from '@percy/logger'; import mime from 'mime-types'; -import { DefaultMap, createResource, hostnameMatches, normalizeURL, waitFor, decodeAndEncodeURLWithLogging, handleIncorrectFontMimeType, executeDomainValidation } from './utils.js'; +import { AbortError, DefaultMap, createResource, hostnameMatches, normalizeURL, waitFor, decodeAndEncodeURLWithLogging, handleIncorrectFontMimeType, executeDomainValidation } from './utils.js'; const MAX_RESOURCE_SIZE = 25 * (1024 ** 2) * 0.63; // 25MB, 0.63 factor for accounting for base64 encoding const ALLOWED_STATUSES = [200, 201, 301, 302, 304, 307, 308]; const ALLOWED_RESOURCES = ['Document', 'Stylesheet', 'Image', 'Media', 'Font', 'Other']; const ABORTED_MESSAGE = 'Request was aborted by browser'; +// Stable, machine-readable codes for abort errors thrown from this module. +// Consumers should prefer `error.code` over string matching on `error.message`. +export const AbortCodes = Object.freeze({ + ABORTED: 'ABORTED', + TIMEOUT_NETWORK_IDLE: 'TIMEOUT_NETWORK_IDLE' +}); + // RequestLifeCycleHandler handles life cycle of a requestId // Ideal flow: requestWillBeSent -> requestPaused -> responseReceived -> loadingFinished / loadingFailed // ServiceWorker flow: requestWillBeSent -> responseReceived -> loadingFinished / loadingFailed @@ -22,8 +29,6 @@ class RequestLifeCycleHandler { // The Interceptor class creates common handlers for dealing with intercepting asset requests // for a given page using various devtools protocol events and commands. export class Network { - static TIMEOUT = undefined; - log = logger('core:discovery'); #requestsLifeCycleHandler = new DefaultMap(() => new RequestLifeCycleHandler()); @@ -101,11 +106,13 @@ export class Network { return requests.length === 0; }, { - timeout: Network.TIMEOUT, + timeout: this.networkIdleWaitTimeout, idle: timeout }).catch(error => { if (error.message.startsWith('Timeout')) { - let message = 'Timed out waiting for network requests to idle.'; + let message = 'Timed out waiting for network requests to idle.\n' + + 'Hint: set PERCY_NETWORK_IDLE_WAIT_TIMEOUT to increase the budget, ' + + 'or allowlist slow domains via the discovery config.'; if (captureResponsiveAssetsEnabled) message += '\nWhile capturing responsive assets try setting PERCY_DO_NOT_CAPTURE_RESPONSIVE_ASSETS to true.'; this._throwTimeoutError(message, filter); } else { @@ -135,7 +142,10 @@ export class Network { if (params.requestId) { /* istanbul ignore if: race condition, very hard to mock this */ if (this.isAborted(params.requestId)) { - throw new Error(ABORTED_MESSAGE); + throw new AbortError(ABORTED_MESSAGE, { + code: AbortCodes.ABORTED, + reason: 'browser-aborted' + }); } } @@ -166,7 +176,14 @@ export class Network { return; } - throw new Error(msg); + // Use a plain Error (NOT AbortError) so this does not trip + // `error.name === 'AbortError'` consumers in discovery.js:520, + // percy.js:347, snapshot.js:472 — those treat AbortError as + // "snapshot was aborted" and would silently drop the timeout. + let err = new Error(msg); + err.code = AbortCodes.TIMEOUT_NETWORK_IDLE; + err.reason = 'network-idle-timeout'; + throw err; } // Called when a request should be removed from various trackers @@ -384,11 +401,11 @@ export class Network { } _initializeNetworkIdleWaitTimeout() { - if (Network.TIMEOUT) return; - - Network.TIMEOUT = parseInt(process.env.PERCY_NETWORK_IDLE_WAIT_TIMEOUT) || 30000; + // Per-instance timeout so concurrent pages with different env values + // (or env values changed mid-run by tests) don't stomp each other. + this.networkIdleWaitTimeout = parseInt(process.env.PERCY_NETWORK_IDLE_WAIT_TIMEOUT) || 30000; - if (Network.TIMEOUT > 60000) { + if (this.networkIdleWaitTimeout > 60000) { this.log.warn('Setting PERCY_NETWORK_IDLE_WAIT_TIMEOUT over 60000ms is not recommended. ' + 'If your page needs more than 60000ms to idle due to CPU/Network load, ' + 'its recommended to increase CI resources where this cli is running.'); @@ -526,7 +543,7 @@ async function sendResponseResource(network, request, session) { // Note: its not a necessity that we would get aborted callback in a tick, its just that if we // already have it then we can safely ignore this error // Its very hard to test it as this function should be called and request should get cancelled before - if (error.message === ABORTED_MESSAGE || error.message.includes('Invalid InterceptionId')) { + if (error.code === AbortCodes.ABORTED || error.message === ABORTED_MESSAGE || error.message.includes('Invalid InterceptionId')) { // defer this to the end of queue to make sure that any incoming aborted messages were // handled and network.#aborted is updated await new Promise((res, _) => process.nextTick(res)); diff --git a/packages/core/src/utils.js b/packages/core/src/utils.js index f5d5f5595..dee1dea6f 100644 --- a/packages/core/src/utils.js +++ b/packages/core/src/utils.js @@ -197,7 +197,9 @@ export async function executeDomainValidation(network, hostname, url, domainVali // Worker returns 'accessible' field, not 'allowed' if (result?.error) { newErrorHosts.add(hostname); - network.log.debug(`Domain validation: ${hostname} validated as BLOCKED - ${result?.reason}`, network.meta); + // Redact upstream-derived `result.reason` — may contain credentials + // from response bodies the validation worker echoed. + network.log.debug(redactSecrets(`Domain validation: ${hostname} validated as BLOCKED - ${result?.reason}`), network.meta); processedDomains.set(hostname, false); return false; } else if (!result?.accessible) { @@ -208,8 +210,10 @@ export async function executeDomainValidation(network, hostname, url, domainVali } return false; } catch (error) { - // On error, default to allowing (fail-open for better UX) - network.log.warn(`Domain validation: Failed to validate ${hostname} - ${error.message}`, network.meta); + // On error, default to allowing (fail-open for better UX). + // Redact `error.message` — upstream HTTP errors can include + // Authorization headers / URL credentials in the failed-request text. + network.log.warn(redactSecrets(`Domain validation: Failed to validate ${hostname} - ${error.message}`), network.meta); processedDomains.set(hostname, false); return false; } finally { diff --git a/packages/core/test/discovery.test.js b/packages/core/test/discovery.test.js index 898bc8b42..656d52445 100644 --- a/packages/core/test/discovery.test.js +++ b/packages/core/test/discovery.test.js @@ -1328,11 +1328,7 @@ describe('Discovery', () => { }); describe('idle timeout', () => { - let Network; - beforeEach(async () => { - ({ Network } = await import('../src/network.js')); - Network.TIMEOUT = undefined; process.env.PERCY_NETWORK_IDLE_WAIT_TIMEOUT = 500; // some async request that takes a while @@ -1344,7 +1340,6 @@ describe('Discovery', () => { }); afterEach(() => { - Network.TIMEOUT = undefined; process.env.PERCY_NETWORK_IDLE_WAIT_TIMEOUT = undefined; process.env.PERCY_IGNORE_TIMEOUT_ERROR = undefined; }); @@ -1356,7 +1351,11 @@ describe('Discovery', () => { }); expect(logger.stderr).toContain( - '[percy] Error: Timed out waiting for network requests to idle.' + jasmine.stringContaining('[percy] Error: Timed out waiting for network requests to idle.') + ); + // R7: actionable hint surfaces in the same error message. + expect(logger.stderr).toContain( + jasmine.stringContaining('PERCY_NETWORK_IDLE_WAIT_TIMEOUT') ); let expectedRequestBody = { @@ -1367,7 +1366,7 @@ describe('Discovery', () => { meta: { build: { id: '123', url: 'https://percy.io/test/test/123', number: 1 }, snapshot: { name: 'test idle' } } }, { - message: 'Timed out waiting for network requests to idle.', + message: jasmine.stringContaining('Timed out waiting for network requests to idle.'), meta: { build: { id: '123', url: 'https://percy.io/test/test/123', number: 1 }, snapshot: { name: 'test idle' } } } ] @@ -1401,6 +1400,7 @@ describe('Discovery', () => { expect(logger.stderr).toContain(jasmine.stringMatching([ '^\\[percy:core] Error: Timed out waiting for network requests to idle.', + 'Hint: set PERCY_NETWORK_IDLE_WAIT_TIMEOUT to increase the budget, or allowlist slow domains via the discovery config.', 'While capturing responsive assets try setting PERCY_DO_NOT_CAPTURE_RESPONSIVE_ASSETS to true.', '', ' Active requests:', @@ -1417,7 +1417,7 @@ describe('Discovery', () => { meta: { build: { id: '123', url: 'https://percy.io/test/test/123', number: 1 }, snapshot: { name: 'test idle' } } }, { - message: 'Timed out waiting for network requests to idle.\nWhile capturing responsive assets try setting PERCY_DO_NOT_CAPTURE_RESPONSIVE_ASSETS to true.\n\n Active requests:\n - http://localhost:8000/img-fromsrcset.png\n', + message: 'Timed out waiting for network requests to idle.\nHint: set PERCY_NETWORK_IDLE_WAIT_TIMEOUT to increase the budget, or allowlist slow domains via the discovery config.\nWhile capturing responsive assets try setting PERCY_DO_NOT_CAPTURE_RESPONSIVE_ASSETS to true.\n\n Active requests:\n - http://localhost:8000/img-fromsrcset.png\n', meta: { build: { id: '123', url: 'https://percy.io/test/test/123', number: 1 }, snapshot: { name: 'test idle' } } } ] @@ -1496,6 +1496,7 @@ describe('Discovery', () => { expect(logger.stderr).not.toContain(jasmine.stringMatching([ '^\\[percy:core] Error: Timed out waiting for network requests to idle.', + 'Hint: set PERCY_NETWORK_IDLE_WAIT_TIMEOUT to increase the budget, or allowlist slow domains via the discovery config.', '', ' Active requests:', ' - http://localhost:8000/img.gif', diff --git a/packages/core/test/unit/network.test.js b/packages/core/test/unit/network.test.js new file mode 100644 index 000000000..ac5e678a3 --- /dev/null +++ b/packages/core/test/unit/network.test.js @@ -0,0 +1,56 @@ +import { setupTest } from '../helpers/index.js'; +import { Network, AbortCodes } from '../../src/network.js'; +import { AbortError } from '../../src/utils.js'; + +describe('Unit / Network', () => { + beforeEach(async () => { + await setupTest(); + }); + + afterEach(() => { + process.env.PERCY_NETWORK_IDLE_WAIT_TIMEOUT = undefined; + }); + + // SC6 — concurrent pages with different PERCY_NETWORK_IDLE_WAIT_TIMEOUT + // values must each see their own value. Pre-fix this was a static class + // field so the second instance overwrote the first. + describe('SC6: instance-scoped network-idle wait timeout', () => { + it('initializes per-instance from env at construction time', () => { + process.env.PERCY_NETWORK_IDLE_WAIT_TIMEOUT = '1234'; + let n1 = new Network({}, { userAgent: 'test' }); + + process.env.PERCY_NETWORK_IDLE_WAIT_TIMEOUT = '5678'; + let n2 = new Network({}, { userAgent: 'test' }); + + expect(n1.networkIdleWaitTimeout).toBe(1234); + expect(n2.networkIdleWaitTimeout).toBe(5678); + }); + + it('falls back to 30000ms when env is unset or invalid', () => { + process.env.PERCY_NETWORK_IDLE_WAIT_TIMEOUT = undefined; + let n1 = new Network({}, { userAgent: 'test' }); + expect(n1.networkIdleWaitTimeout).toBe(30000); + + process.env.PERCY_NETWORK_IDLE_WAIT_TIMEOUT = 'not-a-number'; + let n2 = new Network({}, { userAgent: 'test' }); + expect(n2.networkIdleWaitTimeout).toBe(30000); + }); + }); + + // R5 — verify the exported AbortCodes contract. + describe('R5: AbortCodes', () => { + it('exports a frozen enum with the codes the network module throws', () => { + expect(AbortCodes.ABORTED).toBe('ABORTED'); + expect(AbortCodes.TIMEOUT_NETWORK_IDLE).toBe('TIMEOUT_NETWORK_IDLE'); + expect(Object.isFrozen(AbortCodes)).toBe(true); + }); + + it('AbortError carries code and reason while keeping name=AbortError', () => { + let err = new AbortError('msg', { code: AbortCodes.ABORTED, reason: 'browser-aborted' }); + expect(err.name).toBe('AbortError'); + expect(err.code).toBe('ABORTED'); + expect(err.reason).toBe('browser-aborted'); + expect(err.message).toBe('msg'); + }); + }); +}); diff --git a/packages/core/test/unit/utils.test.js b/packages/core/test/unit/utils.test.js index bd74b4f16..7b143cd61 100644 --- a/packages/core/test/unit/utils.test.js +++ b/packages/core/test/unit/utils.test.js @@ -202,6 +202,22 @@ describe('Unit / Utils', () => { it('should redact sensitive keys from array of object', () => { expect(redactSecrets([{ message: 'This is a secret: ASIAY34FZKBOKMUTVV7A' }])).toEqual([{ message: 'This is a secret: [REDACTED]' }]); }); + + // SC8 fixtures — verify the categories the plan claims to cover + // for the domain-validation log path. Categories outside secretPatterns.yml + // (Cookie:, JSESSIONID, custom auth schemes) are deferred to a yml-augment ticket. + describe('SC8: domain-validation error fixtures', () => { + it('redacts AWS access keys embedded in upstream error text', () => { + let msg = 'Domain validation: Failed to validate example.com - AWS error AKIAIOSFODNN7EXAMPLE returned'; + expect(redactSecrets(msg)).toEqual(jasmine.stringContaining('[REDACTED]')); + expect(redactSecrets(msg)).not.toContain('AKIAIOSFODNN7EXAMPLE'); + }); + + it('redacts URL-embedded credentials', () => { + let msg = 'Domain validation: Failed to validate example.com - request to https://admin:secret-AKIAIOSFODNN7EXAMPLE@host/path failed'; + expect(redactSecrets(msg)).toContain('[REDACTED]'); + }); + }); }); describe('base64encode', () => {