From 80aca084dd94470d03416b5f6a8ec958c2d83986 Mon Sep 17 00:00:00 2001
From: Ahmed Abushagur <ahmed@abushagur.com>
Date: Mon, 13 Apr 2026 16:41:37 -0700
Subject: [PATCH] =?UTF-8?q?fix(growth):=20security=20hardening=20=E2=80=94?=
 =?UTF-8?q?=20bun=20-e=20interpolation,=20pkill=20race,=20input=20validati?=
 =?UTF-8?q?on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes a batch of real security findings filed against growth.sh and reddit-fetch.ts.

growth.sh:
- Switch all four `bun -e "...${VAR}..."` sites to env-var passing
  (_VAR="..." bun -e 'process.env._VAR'), per .claude/rules/shell-scripts.md.
  Closes #3188, #3221, #3223.
- Spawn claude under `setsid` so it owns its own process group, and kill the
  group via `kill -SIG -PGID` instead of racing with pkill -P. Adds a numeric
  guard on CLAUDE_PID. Closes #3193, #3205.
- POST to SPA with Authorization header loaded from a 0600 temp config file
  (-K) and body from a 0600 temp file instead of here-string, so
  SPA_TRIGGER_SECRET never appears in ps/cmdline. Closes #3224.
- Drop dead REDDIT_JSON=$(cat ...) line.
- Extend cleanup trap to also remove CLAUDE_OUTPUT_FILE, SPA_AUTH_FILE, SPA_BODY_FILE.

reddit-fetch.ts:
- Validate REDDIT_CLIENT_ID / REDDIT_CLIENT_SECRET don't contain ':' or CRLF
  (prevents Basic-auth corruption and header injection). Closes #3198.
- Validate REDDIT_USERNAME against Reddit's charset before interpolating into
  the User-Agent header (prevents CRLF injection). Closes #3207.
- Validate Reddit-API-returned author names against the same charset and
  encodeURIComponent them before interpolating into the /user/ API path
  (prevents path traversal from a hostile Reddit username). Closes #3202.
---
 .claude/skills/setup-agent-team/growth.sh     | 97 +++++++++++--------
 .../skills/setup-agent-team/reddit-fetch.ts   | 25 ++++-
 2 files changed, 82 insertions(+), 40 deletions(-)

diff --git a/.claude/skills/setup-agent-team/growth.sh b/.claude/skills/setup-agent-team/growth.sh
index 9dfb7ee77..0d3b0164d 100644
--- a/.claude/skills/setup-agent-team/growth.sh
+++ b/.claude/skills/setup-agent-team/growth.sh
@@ -33,7 +33,8 @@ cleanup() {
     local exit_code=$?
     log "Running cleanup (exit_code=${exit_code})..."
 
-    rm -f "${PROMPT_FILE:-}" "${REDDIT_DATA_FILE:-}" "${CLAUDE_STREAM_FILE:-}" 2>/dev/null || true
+    rm -f "${PROMPT_FILE:-}" "${REDDIT_DATA_FILE:-}" "${CLAUDE_STREAM_FILE:-}" \
+          "${CLAUDE_OUTPUT_FILE:-}" "${SPA_AUTH_FILE:-}" "${SPA_BODY_FILE:-}" 2>/dev/null || true
     if [[ -n "${CLAUDE_PID:-}" ]] && kill -0 "${CLAUDE_PID}" 2>/dev/null; then
         kill -TERM "${CLAUDE_PID}" 2>/dev/null || true
     fi
@@ -64,7 +65,7 @@ if ! bun run "${SCRIPT_DIR}/reddit-fetch.ts" > "${REDDIT_DATA_FILE}" 2>> "${LOG_
     exit 1
 fi
 
-POST_COUNT=$(bun -e "const d=JSON.parse(await Bun.file('${REDDIT_DATA_FILE}').text()); console.log(d.postsScanned ?? d.posts?.length ?? 0)")
+POST_COUNT=$(_DATA_FILE="${REDDIT_DATA_FILE}" bun -e 'const d=JSON.parse(await Bun.file(process.env._DATA_FILE).text()); console.log(d.postsScanned ?? d.posts?.length ?? 0)')
 log "Phase 1 done: ${POST_COUNT} posts fetched"
 
 # --- Phase 2: Score with Claude ---
@@ -79,40 +80,50 @@ if [[ ! -f "$PROMPT_TEMPLATE" ]]; then
     exit 1
 fi
 
-# Inject Reddit data into prompt template
-REDDIT_JSON=$(cat "${REDDIT_DATA_FILE}")
-# Use bun for safe substitution to avoid sed escaping issues with JSON
+# Inject Reddit data into prompt template.
+# Paths are passed via env vars — never interpolated into the JS string — per
+# .claude/rules/shell-scripts.md ("Pass data to bun via environment variables").
 DECISIONS_FILE="${HOME}/.config/spawn/growth-decisions.md"
-bun -e "
-import { existsSync } from 'node:fs';
-const template = await Bun.file('${PROMPT_TEMPLATE}').text();
-const data = await Bun.file('${REDDIT_DATA_FILE}').text();
-const decisionsPath = '${DECISIONS_FILE}';
-const decisions = existsSync(decisionsPath) ? await Bun.file(decisionsPath).text() : 'No past decisions yet.';
+_TEMPLATE="${PROMPT_TEMPLATE}" \
+_DATA_FILE="${REDDIT_DATA_FILE}" \
+_DECISIONS="${DECISIONS_FILE}" \
+_OUT="${PROMPT_FILE}" \
+bun -e '
+import { existsSync } from "node:fs";
+const template = await Bun.file(process.env._TEMPLATE).text();
+const data = await Bun.file(process.env._DATA_FILE).text();
+const decisionsPath = process.env._DECISIONS;
+const decisions = existsSync(decisionsPath) ? await Bun.file(decisionsPath).text() : "No past decisions yet.";
 const result = template
-  .replace('REDDIT_DATA_PLACEHOLDER', data.trim())
-  .replace('DECISIONS_PLACEHOLDER', decisions.trim());
-await Bun.write('${PROMPT_FILE}', result);
-"
+  .replace("REDDIT_DATA_PLACEHOLDER", data.trim())
+  .replace("DECISIONS_PLACEHOLDER", decisions.trim());
+await Bun.write(process.env._OUT, result);
+'
 
 log "Hard timeout: ${HARD_TIMEOUT}s"
 
 # Run claude with stream-json to capture text (plain -p stdout is empty with extended thinking)
 CLAUDE_STREAM_FILE=$(mktemp /tmp/growth-stream-XXXXXX.jsonl)
 CLAUDE_OUTPUT_FILE=$(mktemp /tmp/growth-output-XXXXXX.txt)
-claude -p - --model sonnet --output-format stream-json --verbose < "${PROMPT_FILE}" > "${CLAUDE_STREAM_FILE}" 2>> "${LOG_FILE}" &
+# Run claude in its own session/process group (setsid) so we can signal the
+# whole tree atomically via `kill -SIG -PGID` instead of racing with pkill -P.
+setsid claude -p - --model sonnet --output-format stream-json --verbose \
+    < "${PROMPT_FILE}" > "${CLAUDE_STREAM_FILE}" 2>> "${LOG_FILE}" &
 CLAUDE_PID=$!
-log "Claude started (pid=${CLAUDE_PID})"
+log "Claude started (pid=${CLAUDE_PID}, pgid=${CLAUDE_PID})"
 
-# Kill claude and its full process tree
+# Kill claude and its full process tree by signalling the process group.
+# Guards against empty/non-numeric CLAUDE_PID (defensive — should never happen).
 kill_claude() {
+    if [[ -z "${CLAUDE_PID:-}" ]] || ! [[ "${CLAUDE_PID}" =~ ^[0-9]+$ ]]; then
+        log "kill_claude: CLAUDE_PID is unset or non-numeric, skipping"
+        return
+    fi
     if kill -0 "${CLAUDE_PID}" 2>/dev/null; then
-        log "Killing claude (pid=${CLAUDE_PID}) and its process tree"
-        pkill -TERM -P "${CLAUDE_PID}" 2>/dev/null || true
-        kill -TERM "${CLAUDE_PID}" 2>/dev/null || true
+        log "Killing claude process group (pgid=${CLAUDE_PID})"
+        kill -TERM -"${CLAUDE_PID}" 2>/dev/null || true
         sleep 5
-        pkill -KILL -P "${CLAUDE_PID}" 2>/dev/null || true
-        kill -KILL "${CLAUDE_PID}" 2>/dev/null || true
+        kill -KILL -"${CLAUDE_PID}" 2>/dev/null || true
     fi
 }
 
@@ -133,22 +144,24 @@ done
 wait "${CLAUDE_PID}" 2>/dev/null
 CLAUDE_EXIT=$?
 
-# Extract text content from stream-json into plain text output file
-bun -e "
-const lines = (await Bun.file('${CLAUDE_STREAM_FILE}').text()).split('\n').filter(Boolean);
+# Extract text content from stream-json into plain text output file.
+_STREAM="${CLAUDE_STREAM_FILE}" \
+_OUT="${CLAUDE_OUTPUT_FILE}" \
+bun -e '
+const lines = (await Bun.file(process.env._STREAM).text()).split("\n").filter(Boolean);
 const texts = [];
 for (const line of lines) {
   try {
     const ev = JSON.parse(line);
-    if (ev.type === 'assistant' && Array.isArray(ev.message?.content)) {
+    if (ev.type === "assistant" && Array.isArray(ev.message?.content)) {
       for (const block of ev.message.content) {
-        if (block.type === 'text' && block.text) texts.push(block.text);
+        if (block.type === "text" && block.text) texts.push(block.text);
       }
     }
   } catch {}
 }
-await Bun.write('${CLAUDE_OUTPUT_FILE}', texts.join('\n'));
-" 2>> "${LOG_FILE}" || true
+await Bun.write(process.env._OUT, texts.join("\n"));
+' 2>> "${LOG_FILE}" || true
 
 # Append Claude output to log
 cat "${CLAUDE_OUTPUT_FILE}" >> "${LOG_FILE}" 2>/dev/null || true
@@ -164,15 +177,15 @@ CANDIDATE_JSON=""
 
 # Extract the last valid json:candidate block from Claude's output
 if [[ -f "${CLAUDE_OUTPUT_FILE}" ]]; then
-    CANDIDATE_JSON=$(bun -e "
-const text = await Bun.file('${CLAUDE_OUTPUT_FILE}').text();
-const blocks = [...text.matchAll(/\`\`\`json:candidate\n([\s\S]*?)\n\`\`\`/g)];
-let result = '';
+    CANDIDATE_JSON=$(_OUT="${CLAUDE_OUTPUT_FILE}" bun -e '
+const text = await Bun.file(process.env._OUT).text();
+const blocks = [...text.matchAll(/```json:candidate\n([\s\S]*?)\n```/g)];
+let result = "";
 for (const block of blocks) {
   try { result = JSON.stringify(JSON.parse(block[1].trim())); } catch {}
 }
 if (result) console.log(result);
-" 2>/dev/null)
+' 2>/dev/null)
 fi
 
 if [[ -z "${CANDIDATE_JSON}" ]]; then
@@ -182,15 +195,23 @@ fi
 
 log "Candidate JSON: ${CANDIDATE_JSON}"
 
-# POST to SPA if SPA_TRIGGER_URL is configured
+# POST to SPA if SPA_TRIGGER_URL is configured.
+# Secret + body are written to 0600 temp files so SPA_TRIGGER_SECRET never
+# appears on the curl command line (visible via ps / /proc/*/cmdline).
 if [[ -n "${SPA_TRIGGER_URL:-}" && -n "${SPA_TRIGGER_SECRET:-}" ]]; then
     log "Posting candidate to SPA at ${SPA_TRIGGER_URL}/candidate"
+    SPA_AUTH_FILE=$(mktemp /tmp/growth-auth-XXXXXX.conf)
+    SPA_BODY_FILE=$(mktemp /tmp/growth-body-XXXXXX.json)
+    chmod 0600 "${SPA_AUTH_FILE}" "${SPA_BODY_FILE}"
+    printf 'header = "Authorization: Bearer %s"\n' "${SPA_TRIGGER_SECRET}" > "${SPA_AUTH_FILE}"
+    printf '%s' "${CANDIDATE_JSON}" > "${SPA_BODY_FILE}"
     HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
         -X POST "${SPA_TRIGGER_URL}/candidate" \
-        -H "Authorization: Bearer ${SPA_TRIGGER_SECRET}" \
+        -K "${SPA_AUTH_FILE}" \
         -H "Content-Type: application/json" \
-        --data-binary @- <<< "${CANDIDATE_JSON}" \
+        --data-binary @"${SPA_BODY_FILE}" \
         --max-time 30) || HTTP_STATUS="000"
+    rm -f "${SPA_AUTH_FILE}" "${SPA_BODY_FILE}"
     log "SPA response: HTTP ${HTTP_STATUS}"
 else
     log "SPA_TRIGGER_URL or SPA_TRIGGER_SECRET not set, skipping Slack notification"
diff --git a/.claude/skills/setup-agent-team/reddit-fetch.ts b/.claude/skills/setup-agent-team/reddit-fetch.ts
index f526fceaf..a732e6294 100644
--- a/.claude/skills/setup-agent-team/reddit-fetch.ts
+++ b/.claude/skills/setup-agent-team/reddit-fetch.ts
@@ -15,13 +15,29 @@ const CLIENT_ID = process.env.REDDIT_CLIENT_ID ?? "";
 const CLIENT_SECRET = process.env.REDDIT_CLIENT_SECRET ?? "";
 const USERNAME = process.env.REDDIT_USERNAME ?? "";
 const PASSWORD = process.env.REDDIT_PASSWORD ?? "";
-const USER_AGENT = `spawn-growth:v1.0.0 (by /u/${USERNAME})`;
 
 if (!CLIENT_ID || !CLIENT_SECRET || !USERNAME || !PASSWORD) {
   console.error("Missing Reddit credentials");
   process.exit(1);
 }
 
+// Validate credential format to prevent Basic-auth corruption and header
+// injection (colons split the user:pass pair; CR/LF splits HTTP headers).
+if (/[:\r\n]/.test(CLIENT_ID) || /[:\r\n]/.test(CLIENT_SECRET)) {
+  console.error("Invalid REDDIT_CLIENT_ID / REDDIT_CLIENT_SECRET: must not contain ':' or newlines");
+  process.exit(1);
+}
+
+// Reddit usernames are [A-Za-z0-9_-], 3–20 chars. Reject anything else so the
+// User-Agent header can't be CRLF-injected via a hostile env var.
+const REDDIT_USERNAME_RE = /^[A-Za-z0-9_-]{1,64}$/;
+if (!REDDIT_USERNAME_RE.test(USERNAME)) {
+  console.error("Invalid REDDIT_USERNAME format");
+  process.exit(1);
+}
+
+const USER_AGENT = `spawn-growth:v1.0.0 (by /u/${USERNAME})`;
+
 // Subreddits — shuffled each run so we don't always hit the same ones first
 const SUBREDDITS = shuffle([
   "Vibecoding",
@@ -199,7 +215,12 @@ function extractPosts(data: unknown): Map<string, RedditPost> {
 /** Fetch a user's recent comments. */
 async function fetchUserComments(token: string, username: string): Promise<string[]> {
   if (!username || username === "[deleted]") return [];
-  const data = await redditGet(token, `/user/${username}/comments?limit=25&sort=new`);
+  // The author field comes from the Reddit API and is therefore untrusted.
+  // Reject anything outside Reddit's real username charset to prevent path
+  // traversal into other API endpoints, and encodeURIComponent as defense in
+  // depth.
+  if (!REDDIT_USERNAME_RE.test(username)) return [];
+  const data = await redditGet(token, `/user/${encodeURIComponent(username)}/comments?limit=25&sort=new`);
   if (!data || typeof data !== "object") return [];
   const listing = data as Record<string, unknown>;
   const listingData = listing.data as Record<string, unknown> | undefined;