Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
205 changes: 205 additions & 0 deletions .github/workflows/review-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,208 @@ jobs:
name: pr-review-feedback
path: feedback/
retention-days: 90

# ==========================================================================
# REPLY TO FEEDBACK
# Responds directly in the PR review thread when a user replies to an agent
# comment. Runs in parallel with capture-feedback — this job handles the
# synchronous conversational reply, while capture-feedback saves the artifact
# for async learning on the next review run (resilient fallback).
# ==========================================================================
reply-to-feedback:
if: |
github.event_name == 'pull_request_review_comment' &&
github.event.comment.in_reply_to_id &&
github.event.comment.user.type != 'Bot'
runs-on: ubuntu-latest
env:
HAS_APP_SECRETS: ${{ secrets.CAGENT_REVIEWER_APP_ID != '' }}

steps:
- name: Check if reply is to agent comment
id: check
shell: bash
env:
GH_TOKEN: ${{ github.token }}
PARENT_ID: ${{ github.event.comment.in_reply_to_id }}
REPO: ${{ github.repository }}
run: |
if [ -z "$PARENT_ID" ]; then
echo "is_agent=false" >> $GITHUB_OUTPUT
echo "⏭️ Not a reply comment, skipping"
exit 0
fi

parent=$(gh api "repos/$REPO/pulls/comments/$PARENT_ID") || {
echo "::warning::Failed to fetch parent comment $PARENT_ID" >&2
echo "is_agent=false" >> $GITHUB_OUTPUT
exit 0
}
# Validate required fields exist before extracting
if ! echo "$parent" | jq -e '.user.type and .body' > /dev/null 2>&1; then
echo "::warning::Parent comment has unexpected structure" >&2
echo "is_agent=false" >> $GITHUB_OUTPUT
exit 0
fi
body=$(echo "$parent" | jq -r '.body')
parent_user_type=$(echo "$parent" | jq -r '.user.type')

# Defense-in-depth: verify the root comment was posted by a Bot (agent) AND
# contains the review marker but NOT the reply marker (substring overlap).
# The user.type check prevents matching human comments that happen to contain
# the marker text (e.g., in discussions about the review system).
if [ "$parent_user_type" = "Bot" ] && \
echo "$body" | grep -q "<!-- cagent-review -->" && \
! echo "$body" | grep -q "<!-- cagent-review-reply -->"; then
echo "is_agent=true" >> $GITHUB_OUTPUT
echo "root_comment_id=$PARENT_ID" >> $GITHUB_OUTPUT

# Extract file path and line from the root comment for context
echo "file_path=$(echo "$parent" | jq -r '.path // ""')" >> $GITHUB_OUTPUT
echo "line=$(echo "$parent" | jq -r '.line // .original_line // ""')" >> $GITHUB_OUTPUT
echo "✅ Reply is to an agent review comment"
else
echo "is_agent=false" >> $GITHUB_OUTPUT
echo "⏭️ Not a reply to agent comment, skipping"
fi

- name: Check authorization
if: steps.check.outputs.is_agent == 'true'
id: auth
shell: bash
env:
AUTHOR_ASSOCIATION: ${{ github.event.comment.author_association }}
run: |
case "$AUTHOR_ASSOCIATION" in
OWNER|MEMBER|COLLABORATOR)
echo "authorized=true" >> $GITHUB_OUTPUT
echo "✅ Author is $AUTHOR_ASSOCIATION — authorized to trigger reply"
;;
*)
echo "authorized=false" >> $GITHUB_OUTPUT
echo "⏭️ Author is $AUTHOR_ASSOCIATION — not authorized for reply"
;;
esac

- name: Build thread context
if: steps.check.outputs.is_agent == 'true' && steps.auth.outputs.authorized == 'true'
id: thread
shell: bash
env:
GH_TOKEN: ${{ github.token }}
ROOT_ID: ${{ steps.check.outputs.root_comment_id }}
PR_NUMBER: ${{ github.event.pull_request.number }}
REPO: ${{ github.repository }}
FILE_PATH: ${{ steps.check.outputs.file_path }}
LINE: ${{ steps.check.outputs.line }}
# The triggering comment from the webhook payload — guaranteed fresh,
# unlike the API which may have eventual consistency lag.
TRIGGER_COMMENT_BODY: ${{ github.event.comment.body }}
TRIGGER_COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
TRIGGER_COMMENT_ID: ${{ github.event.comment.id }}
run: |
# Fetch the root comment (fail early if the API call errors)
root=$(gh api "repos/$REPO/pulls/comments/$ROOT_ID") || {
echo "::error::Failed to fetch root comment $ROOT_ID" >&2
exit 1
}
root_body=$(echo "$root" | jq -r '.body // ""')

# Fetch all review comments on this PR and filter to this thread.
# Uses --paginate to handle PRs with >100 review comments.
# Each page is processed by jq independently, then merged with jq -s.
# Note: the triggering comment may not appear here due to eventual
# consistency, so we append it from the webhook payload below.
all_comments=$(gh api --paginate "repos/$REPO/pulls/$PR_NUMBER/comments" \
--jq "[.[] | select(.in_reply_to_id == $ROOT_ID)]" | jq -s 'add // [] | sort_by(.created_at)') || {
echo "::error::Failed to fetch thread comments for PR $PR_NUMBER" >&2
exit 1
}

# Build the thread context and save as step output.
# Use a randomized delimiter to prevent comment body content from
# colliding with the GITHUB_OUTPUT heredoc terminator.
DELIM="THREAD_CONTEXT_$(openssl rand -hex 8)"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LOW: Heredoc delimiter collision risk

While the heredoc delimiter uses openssl rand -hex 8 (2^64 combinations), the TRIGGER_COMMENT_BODY variable is inserted without sanitization. A malicious developer could intentionally craft a comment containing the delimiter pattern to prematurely terminate the heredoc, potentially truncating output or exposing subsequent commands.

Recommendation: Sanitize user input or use a fixed, non-guessable delimiter:

DELIM="EOF_$(uuidgen | tr -d -)_EOF"

Or escape/filter the comment body before embedding it.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟢 LOW: Potential heredoc injection via TRIGGER_COMMENT_BODY (severity downgraded after verification)

The code uses a randomized heredoc delimiter (DELIM="THREAD_CONTEXT_$(openssl rand -hex 8)") to prevent injection when writing user-controlled comment bodies into GITHUB_OUTPUT. While the 16-character cryptographically random delimiter makes exploitation extremely difficult (attacker would need to predict the value in advance), safer quoting strategies exist.

Current approach (adequate):

DELIM="THREAD_CONTEXT_$(openssl rand -hex 8)"
echo "$TRIGGER_COMMENT_BODY"

More robust approach:

printf '%s\n' "$TRIGGER_COMMENT_BODY"  # or use $'...' quoting

This is a defense-in-depth suggestion rather than a critical fix — the randomized delimiter is an effective practical defense.


{
echo "prompt<<$DELIM"
echo "A developer replied to your review comment. Read the thread context below and respond"
echo "in the same thread."
echo ""
echo "---"
echo "REPO=$REPO"
echo "PR_NUMBER=$PR_NUMBER"
echo "ROOT_COMMENT_ID=$ROOT_ID"
echo "FILE_PATH=$FILE_PATH"
echo "LINE=$LINE"
echo ""
echo "[ORIGINAL REVIEW COMMENT]"
echo "$root_body"
echo ""

# Add earlier replies from the API (excludes the triggering comment
# to avoid duplication if the API already has it)
reply_count=$(echo "$all_comments" | jq 'length')
if [ "$reply_count" -gt 0 ]; then
for i in $(seq 0 $((reply_count - 1))); do
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 MEDIUM: Bot replies not filtered from thread context, could cause agent to respond to itself

The workflow checks github.event.comment.user.type != 'Bot' at the job level, which correctly filters the triggering comment. However, when building thread context, the code iterates through all_comments and includes all of them without filtering bot comments:

for i in $(seq 0 $((reply_count - 1))); do
  comment_id=$(echo "$all_comments" | jq -r ".[$i].id") || continue
  # ... adds comment to thread context without checking user.type
done

If the agent (or any bot) has previously replied in the thread, those bot replies will be included in the thread context. This could cause the agent to read and respond to its own previous replies, creating confusion in multi-turn conversations.

Suggested fix: Filter bot comments when building the thread context:

user_type=$(echo "$all_comments" | jq -r ".[$i].user.type") || continue
if [ "$user_type" = "Bot" ]; then
  continue
fi

comment_id=$(echo "$all_comments" | jq -r ".[$i].id") || continue
# Skip the triggering comment — we append it from the payload below
if [ "$comment_id" = "$TRIGGER_COMMENT_ID" ]; then
continue
fi
# Skip bot replies to avoid the agent responding to its own previous replies
user_type=$(echo "$all_comments" | jq -r ".[$i].user.type") || continue
if [ "$user_type" = "Bot" ]; then
continue
fi
author=$(echo "$all_comments" | jq -r ".[$i].user.login") || continue
body=$(echo "$all_comments" | jq -r ".[$i].body") || continue
echo "[REPLY by @$author]"
echo "$body"
echo ""
done
fi

# Always append the triggering comment last — sourced directly from
# the webhook payload so it's guaranteed to be present.
echo "[REPLY by @$TRIGGER_COMMENT_AUTHOR] ← this is the reply you are responding to"
echo "$TRIGGER_COMMENT_BODY"
echo ""
echo "$DELIM"
} >> $GITHUB_OUTPUT

echo "✅ Built thread context with replies (triggering comment from webhook payload)"

# Safe to checkout PR head because the reply agent only READS files (no code execution)
- name: Checkout PR head
if: steps.check.outputs.is_agent == 'true' && steps.auth.outputs.authorized == 'true'
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
ref: refs/pull/${{ github.event.pull_request.number }}/head

# Generate GitHub App token for custom app identity (optional - falls back to github.token)
- name: Generate GitHub App token
if: steps.check.outputs.is_agent == 'true' && steps.auth.outputs.authorized == 'true' && env.HAS_APP_SECRETS == 'true'
id: app-token
continue-on-error: true
uses: tibdex/github-app-token@3beb63f4bd073e61482598c45c71c1019b59b73a # v2
with:
app_id: ${{ secrets.CAGENT_REVIEWER_APP_ID }}
private_key: ${{ secrets.CAGENT_REVIEWER_APP_PRIVATE_KEY }}

- name: Run reply
if: steps.check.outputs.is_agent == 'true' && steps.auth.outputs.authorized == 'true'
continue-on-error: true
uses: docker/cagent-action/review-pr/reply@latest
with:
thread-context: ${{ steps.thread.outputs.prompt }}
comment-id: ${{ github.event.comment.id }}
anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
openai-api-key: ${{ secrets.OPENAI_API_KEY }}
google-api-key: ${{ secrets.GOOGLE_API_KEY }}
aws-bearer-token-bedrock: ${{ secrets.AWS_BEARER_TOKEN_BEDROCK }}
xai-api-key: ${{ secrets.XAI_API_KEY }}
nebius-api-key: ${{ secrets.NEBIUS_API_KEY }}
mistral-api-key: ${{ secrets.MISTRAL_API_KEY }}
github-token: ${{ steps.app-token.outputs.token || github.token }}
Loading
Loading