From 80aca084dd94470d03416b5f6a8ec958c2d83986 Mon Sep 17 00:00:00 2001 From: Ahmed Abushagur Date: Mon, 13 Apr 2026 16:41:37 -0700 Subject: [PATCH] =?UTF-8?q?fix(growth):=20security=20hardening=20=E2=80=94?= =?UTF-8?q?=20bun=20-e=20interpolation,=20pkill=20race,=20input=20validati?= =?UTF-8?q?on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes a batch of real security findings filed against growth.sh and reddit-fetch.ts. growth.sh: - Switch all four `bun -e "...${VAR}..."` sites to env-var passing (_VAR="..." bun -e 'process.env._VAR'), per .claude/rules/shell-scripts.md. Closes #3188, #3221, #3223. - Spawn claude under `setsid` so it owns its own process group, and kill the group via `kill -SIG -PGID` instead of racing with pkill -P. Adds a numeric guard on CLAUDE_PID. Closes #3193, #3205. - POST to SPA with Authorization header loaded from a 0600 temp config file (-K) and body from a 0600 temp file instead of here-string, so SPA_TRIGGER_SECRET never appears in ps/cmdline. Closes #3224. - Drop dead REDDIT_JSON=$(cat ...) line. - Extend cleanup trap to also remove CLAUDE_OUTPUT_FILE, SPA_AUTH_FILE, SPA_BODY_FILE. reddit-fetch.ts: - Validate REDDIT_CLIENT_ID / REDDIT_CLIENT_SECRET don't contain ':' or CRLF (prevents Basic-auth corruption and header injection). Closes #3198. - Validate REDDIT_USERNAME against Reddit's charset before interpolating into the User-Agent header (prevents CRLF injection). Closes #3207. - Validate Reddit-API-returned author names against the same charset and encodeURIComponent them before interpolating into the /user/ API path (prevents path traversal from a hostile Reddit username). Closes #3202. --- .claude/skills/setup-agent-team/growth.sh | 97 +++++++++++-------- .../skills/setup-agent-team/reddit-fetch.ts | 25 ++++- 2 files changed, 82 insertions(+), 40 deletions(-) diff --git a/.claude/skills/setup-agent-team/growth.sh b/.claude/skills/setup-agent-team/growth.sh index 9dfb7ee77..0d3b0164d 100644 --- a/.claude/skills/setup-agent-team/growth.sh +++ b/.claude/skills/setup-agent-team/growth.sh @@ -33,7 +33,8 @@ cleanup() { local exit_code=$? log "Running cleanup (exit_code=${exit_code})..." - rm -f "${PROMPT_FILE:-}" "${REDDIT_DATA_FILE:-}" "${CLAUDE_STREAM_FILE:-}" 2>/dev/null || true + rm -f "${PROMPT_FILE:-}" "${REDDIT_DATA_FILE:-}" "${CLAUDE_STREAM_FILE:-}" \ + "${CLAUDE_OUTPUT_FILE:-}" "${SPA_AUTH_FILE:-}" "${SPA_BODY_FILE:-}" 2>/dev/null || true if [[ -n "${CLAUDE_PID:-}" ]] && kill -0 "${CLAUDE_PID}" 2>/dev/null; then kill -TERM "${CLAUDE_PID}" 2>/dev/null || true fi @@ -64,7 +65,7 @@ if ! bun run "${SCRIPT_DIR}/reddit-fetch.ts" > "${REDDIT_DATA_FILE}" 2>> "${LOG_ exit 1 fi -POST_COUNT=$(bun -e "const d=JSON.parse(await Bun.file('${REDDIT_DATA_FILE}').text()); console.log(d.postsScanned ?? d.posts?.length ?? 0)") +POST_COUNT=$(_DATA_FILE="${REDDIT_DATA_FILE}" bun -e 'const d=JSON.parse(await Bun.file(process.env._DATA_FILE).text()); console.log(d.postsScanned ?? d.posts?.length ?? 0)') log "Phase 1 done: ${POST_COUNT} posts fetched" # --- Phase 2: Score with Claude --- @@ -79,40 +80,50 @@ if [[ ! -f "$PROMPT_TEMPLATE" ]]; then exit 1 fi -# Inject Reddit data into prompt template -REDDIT_JSON=$(cat "${REDDIT_DATA_FILE}") -# Use bun for safe substitution to avoid sed escaping issues with JSON +# Inject Reddit data into prompt template. +# Paths are passed via env vars — never interpolated into the JS string — per +# .claude/rules/shell-scripts.md ("Pass data to bun via environment variables"). DECISIONS_FILE="${HOME}/.config/spawn/growth-decisions.md" -bun -e " -import { existsSync } from 'node:fs'; -const template = await Bun.file('${PROMPT_TEMPLATE}').text(); -const data = await Bun.file('${REDDIT_DATA_FILE}').text(); -const decisionsPath = '${DECISIONS_FILE}'; -const decisions = existsSync(decisionsPath) ? await Bun.file(decisionsPath).text() : 'No past decisions yet.'; +_TEMPLATE="${PROMPT_TEMPLATE}" \ +_DATA_FILE="${REDDIT_DATA_FILE}" \ +_DECISIONS="${DECISIONS_FILE}" \ +_OUT="${PROMPT_FILE}" \ +bun -e ' +import { existsSync } from "node:fs"; +const template = await Bun.file(process.env._TEMPLATE).text(); +const data = await Bun.file(process.env._DATA_FILE).text(); +const decisionsPath = process.env._DECISIONS; +const decisions = existsSync(decisionsPath) ? await Bun.file(decisionsPath).text() : "No past decisions yet."; const result = template - .replace('REDDIT_DATA_PLACEHOLDER', data.trim()) - .replace('DECISIONS_PLACEHOLDER', decisions.trim()); -await Bun.write('${PROMPT_FILE}', result); -" + .replace("REDDIT_DATA_PLACEHOLDER", data.trim()) + .replace("DECISIONS_PLACEHOLDER", decisions.trim()); +await Bun.write(process.env._OUT, result); +' log "Hard timeout: ${HARD_TIMEOUT}s" # Run claude with stream-json to capture text (plain -p stdout is empty with extended thinking) CLAUDE_STREAM_FILE=$(mktemp /tmp/growth-stream-XXXXXX.jsonl) CLAUDE_OUTPUT_FILE=$(mktemp /tmp/growth-output-XXXXXX.txt) -claude -p - --model sonnet --output-format stream-json --verbose < "${PROMPT_FILE}" > "${CLAUDE_STREAM_FILE}" 2>> "${LOG_FILE}" & +# Run claude in its own session/process group (setsid) so we can signal the +# whole tree atomically via `kill -SIG -PGID` instead of racing with pkill -P. +setsid claude -p - --model sonnet --output-format stream-json --verbose \ + < "${PROMPT_FILE}" > "${CLAUDE_STREAM_FILE}" 2>> "${LOG_FILE}" & CLAUDE_PID=$! -log "Claude started (pid=${CLAUDE_PID})" +log "Claude started (pid=${CLAUDE_PID}, pgid=${CLAUDE_PID})" -# Kill claude and its full process tree +# Kill claude and its full process tree by signalling the process group. +# Guards against empty/non-numeric CLAUDE_PID (defensive — should never happen). kill_claude() { + if [[ -z "${CLAUDE_PID:-}" ]] || ! [[ "${CLAUDE_PID}" =~ ^[0-9]+$ ]]; then + log "kill_claude: CLAUDE_PID is unset or non-numeric, skipping" + return + fi if kill -0 "${CLAUDE_PID}" 2>/dev/null; then - log "Killing claude (pid=${CLAUDE_PID}) and its process tree" - pkill -TERM -P "${CLAUDE_PID}" 2>/dev/null || true - kill -TERM "${CLAUDE_PID}" 2>/dev/null || true + log "Killing claude process group (pgid=${CLAUDE_PID})" + kill -TERM -"${CLAUDE_PID}" 2>/dev/null || true sleep 5 - pkill -KILL -P "${CLAUDE_PID}" 2>/dev/null || true - kill -KILL "${CLAUDE_PID}" 2>/dev/null || true + kill -KILL -"${CLAUDE_PID}" 2>/dev/null || true fi } @@ -133,22 +144,24 @@ done wait "${CLAUDE_PID}" 2>/dev/null CLAUDE_EXIT=$? -# Extract text content from stream-json into plain text output file -bun -e " -const lines = (await Bun.file('${CLAUDE_STREAM_FILE}').text()).split('\n').filter(Boolean); +# Extract text content from stream-json into plain text output file. +_STREAM="${CLAUDE_STREAM_FILE}" \ +_OUT="${CLAUDE_OUTPUT_FILE}" \ +bun -e ' +const lines = (await Bun.file(process.env._STREAM).text()).split("\n").filter(Boolean); const texts = []; for (const line of lines) { try { const ev = JSON.parse(line); - if (ev.type === 'assistant' && Array.isArray(ev.message?.content)) { + if (ev.type === "assistant" && Array.isArray(ev.message?.content)) { for (const block of ev.message.content) { - if (block.type === 'text' && block.text) texts.push(block.text); + if (block.type === "text" && block.text) texts.push(block.text); } } } catch {} } -await Bun.write('${CLAUDE_OUTPUT_FILE}', texts.join('\n')); -" 2>> "${LOG_FILE}" || true +await Bun.write(process.env._OUT, texts.join("\n")); +' 2>> "${LOG_FILE}" || true # Append Claude output to log cat "${CLAUDE_OUTPUT_FILE}" >> "${LOG_FILE}" 2>/dev/null || true @@ -164,15 +177,15 @@ CANDIDATE_JSON="" # Extract the last valid json:candidate block from Claude's output if [[ -f "${CLAUDE_OUTPUT_FILE}" ]]; then - CANDIDATE_JSON=$(bun -e " -const text = await Bun.file('${CLAUDE_OUTPUT_FILE}').text(); -const blocks = [...text.matchAll(/\`\`\`json:candidate\n([\s\S]*?)\n\`\`\`/g)]; -let result = ''; + CANDIDATE_JSON=$(_OUT="${CLAUDE_OUTPUT_FILE}" bun -e ' +const text = await Bun.file(process.env._OUT).text(); +const blocks = [...text.matchAll(/```json:candidate\n([\s\S]*?)\n```/g)]; +let result = ""; for (const block of blocks) { try { result = JSON.stringify(JSON.parse(block[1].trim())); } catch {} } if (result) console.log(result); -" 2>/dev/null) +' 2>/dev/null) fi if [[ -z "${CANDIDATE_JSON}" ]]; then @@ -182,15 +195,23 @@ fi log "Candidate JSON: ${CANDIDATE_JSON}" -# POST to SPA if SPA_TRIGGER_URL is configured +# POST to SPA if SPA_TRIGGER_URL is configured. +# Secret + body are written to 0600 temp files so SPA_TRIGGER_SECRET never +# appears on the curl command line (visible via ps / /proc/*/cmdline). if [[ -n "${SPA_TRIGGER_URL:-}" && -n "${SPA_TRIGGER_SECRET:-}" ]]; then log "Posting candidate to SPA at ${SPA_TRIGGER_URL}/candidate" + SPA_AUTH_FILE=$(mktemp /tmp/growth-auth-XXXXXX.conf) + SPA_BODY_FILE=$(mktemp /tmp/growth-body-XXXXXX.json) + chmod 0600 "${SPA_AUTH_FILE}" "${SPA_BODY_FILE}" + printf 'header = "Authorization: Bearer %s"\n' "${SPA_TRIGGER_SECRET}" > "${SPA_AUTH_FILE}" + printf '%s' "${CANDIDATE_JSON}" > "${SPA_BODY_FILE}" HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \ -X POST "${SPA_TRIGGER_URL}/candidate" \ - -H "Authorization: Bearer ${SPA_TRIGGER_SECRET}" \ + -K "${SPA_AUTH_FILE}" \ -H "Content-Type: application/json" \ - --data-binary @- <<< "${CANDIDATE_JSON}" \ + --data-binary @"${SPA_BODY_FILE}" \ --max-time 30) || HTTP_STATUS="000" + rm -f "${SPA_AUTH_FILE}" "${SPA_BODY_FILE}" log "SPA response: HTTP ${HTTP_STATUS}" else log "SPA_TRIGGER_URL or SPA_TRIGGER_SECRET not set, skipping Slack notification" diff --git a/.claude/skills/setup-agent-team/reddit-fetch.ts b/.claude/skills/setup-agent-team/reddit-fetch.ts index f526fceaf..a732e6294 100644 --- a/.claude/skills/setup-agent-team/reddit-fetch.ts +++ b/.claude/skills/setup-agent-team/reddit-fetch.ts @@ -15,13 +15,29 @@ const CLIENT_ID = process.env.REDDIT_CLIENT_ID ?? ""; const CLIENT_SECRET = process.env.REDDIT_CLIENT_SECRET ?? ""; const USERNAME = process.env.REDDIT_USERNAME ?? ""; const PASSWORD = process.env.REDDIT_PASSWORD ?? ""; -const USER_AGENT = `spawn-growth:v1.0.0 (by /u/${USERNAME})`; if (!CLIENT_ID || !CLIENT_SECRET || !USERNAME || !PASSWORD) { console.error("Missing Reddit credentials"); process.exit(1); } +// Validate credential format to prevent Basic-auth corruption and header +// injection (colons split the user:pass pair; CR/LF splits HTTP headers). +if (/[:\r\n]/.test(CLIENT_ID) || /[:\r\n]/.test(CLIENT_SECRET)) { + console.error("Invalid REDDIT_CLIENT_ID / REDDIT_CLIENT_SECRET: must not contain ':' or newlines"); + process.exit(1); +} + +// Reddit usernames are [A-Za-z0-9_-], 3–20 chars. Reject anything else so the +// User-Agent header can't be CRLF-injected via a hostile env var. +const REDDIT_USERNAME_RE = /^[A-Za-z0-9_-]{1,64}$/; +if (!REDDIT_USERNAME_RE.test(USERNAME)) { + console.error("Invalid REDDIT_USERNAME format"); + process.exit(1); +} + +const USER_AGENT = `spawn-growth:v1.0.0 (by /u/${USERNAME})`; + // Subreddits — shuffled each run so we don't always hit the same ones first const SUBREDDITS = shuffle([ "Vibecoding", @@ -199,7 +215,12 @@ function extractPosts(data: unknown): Map { /** Fetch a user's recent comments. */ async function fetchUserComments(token: string, username: string): Promise { if (!username || username === "[deleted]") return []; - const data = await redditGet(token, `/user/${username}/comments?limit=25&sort=new`); + // The author field comes from the Reddit API and is therefore untrusted. + // Reject anything outside Reddit's real username charset to prevent path + // traversal into other API endpoints, and encodeURIComponent as defense in + // depth. + if (!REDDIT_USERNAME_RE.test(username)) return []; + const data = await redditGet(token, `/user/${encodeURIComponent(username)}/comments?limit=25&sort=new`); if (!data || typeof data !== "object") return []; const listing = data as Record; const listingData = listing.data as Record | undefined;