From 6e5bc128c97893823e1c2ebf68301237e6240feb Mon Sep 17 00:00:00 2001 From: autogame-17 <17@evomap.ai> Date: Thu, 11 Jun 2026 02:02:49 +0800 Subject: [PATCH] fix(sanitize): don't reverse-flag path/URL-shaped env values as leaks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit detectEnvValueLeaks reverse-scans every process.env value and flags any that appears verbatim in content. CI tooling exports many env vars whose value is the repo checkout path: the runner sets GITHUB_WORKSPACE / RUNNER_WORKSPACE, and `npm test` additionally sets INIT_CWD / npm_config_local_prefix / npm_package_json / PWD — all = /home/runner/work/evolver/evolver. Each is a substring of capsule content that legitimately references the build path, so the reverse scan reported a false-positive leak. This (a) failed test/sanitize.test.js:280 on every CI run while passing locally, and (b) would block every self-PR created from CI over its own runner path. Filesystem paths and URLs are not secrets, so skip path/URL-shaped env values in the reverse scan. Genuine sensitive paths in content are still caught by the local_path pattern scanner and credentialed URLs by db_url / basic_auth — the reverse scan exists for non-pattern-matchable hardcoded secret values, which are never paths/URLs. Regression test sets the runner/npm checkout-path vars and asserts fullLeakCheck stays clean, plus asserts a non-path secret value is still reverse-detected so the security guarantee is locked in. main CI had been red since at least v1.88.3 (2026-06-06), including on docs-only commits that never touched sanitize — confirming an environmental (env-dependent) failure, not a code regression. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/gep/sanitize.js | 9 +++++++++ test/sanitize.test.js | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/gep/sanitize.js b/src/gep/sanitize.js index 676e8adf..79e1d0f9 100644 --- a/src/gep/sanitize.js +++ b/src/gep/sanitize.js @@ -218,6 +218,15 @@ function detectEnvValueLeaks(content) { for (const [key, val] of Object.entries(process.env)) { if (!val || val.length < 8) continue; if (ENV_SCAN_SKIP_KEYS.has(key)) continue; + // Filesystem paths and URLs are not secrets, and CI tooling exports dozens + // of env vars whose value is the repo checkout path — the runner + // (GITHUB_WORKSPACE, RUNNER_WORKSPACE) and, when tests run via `npm test`, + // npm itself (INIT_CWD, npm_config_local_prefix, npm_package_json, PWD). + // Reverse-flagging them is a false positive whenever capsule content + // legitimately references the build path: it blocks self-PRs and fails only + // under CI. Genuine sensitive paths in content are still caught by the + // local_path pattern scanner, and credentialed URLs by db_url / basic_auth. + if (/^(\/|[A-Za-z]:\\|[a-z][a-z0-9+.-]*:\/\/)/i.test(val)) continue; if (content.includes(val)) { leaks.push({ type: 'env_value_leak', envKey: key, value: val.length > 60 ? val.slice(0, 57) + '...' : val, suggestion: 'process.env.' + key }); } diff --git a/test/sanitize.test.js b/test/sanitize.test.js index 52ac1813..8d5875dd 100644 --- a/test/sanitize.test.js +++ b/test/sanitize.test.js @@ -302,4 +302,39 @@ const ghLegacyNoreply = scanForLeaks('opened by classicuser@users.noreply.github assert.strictEqual(ghLegacyNoreply.found, false, 'scanForLeaks must NOT flag legacy GitHub noreply addresses (any local part)'); -console.log('All sanitize tests passed (68 assertions)'); +// Regression: CI runners + npm populate several env vars with the repo checkout +// path — GITHUB_WORKSPACE / RUNNER_WORKSPACE from the runner, and INIT_CWD / +// npm_config_local_prefix / npm_package_json / PWD from `npm test`. When capsule +// content legitimately references that build path, detectEnvValueLeaks must NOT +// report it as an env-value leak, or every self-PR from CI would be blocked over +// its own build path. This failed only under CI (where these vars are set to +// /home/runner/work/...) until path/URL-shaped env values were skipped. +const RUNNER_PATH = '/home/runner/work/evolver/evolver'; +const SECRET_VAL = 'zzz9988aa77bb66cc55dd'; +const _ciEnvKeys = ['GITHUB_WORKSPACE', 'INIT_CWD', 'npm_config_local_prefix', 'EVOLVER_TEST_SECRET']; +const _savedCiEnv = {}; +for (const k of _ciEnvKeys) _savedCiEnv[k] = process.env[k]; +process.env.GITHUB_WORKSPACE = RUNNER_PATH; +process.env.INIT_CWD = RUNNER_PATH; +process.env.npm_config_local_prefix = RUNNER_PATH; +process.env.EVOLVER_TEST_SECRET = SECRET_VAL; +try { + assert.strictEqual( + fullLeakCheck('build trace from /home/runner/work/evolver/evolver/src/foo.js').found, + false, + 'CI runner/npm checkout-path env vars must NOT be flagged as env-value leaks' + ); + // Security guarantee intact: a non-path secret env value is still reverse-detected. + assert.strictEqual( + fullLeakCheck('config contains ' + SECRET_VAL + ' inline').found, + true, + 'non-path secret env value must still be reverse-detected' + ); +} finally { + for (const k of _ciEnvKeys) { + if (_savedCiEnv[k] === undefined) delete process.env[k]; + else process.env[k] = _savedCiEnv[k]; + } +} + +console.log('All sanitize tests passed (70 assertions)');