diff --git a/.github/scripts/post_autogen_docs_slack.py b/.github/scripts/post_autogen_docs_slack.py new file mode 100644 index 00000000..dd34daba --- /dev/null +++ b/.github/scripts/post_autogen_docs_slack.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +"""Post a reviewer summary for an autogen-docs PR to Slack. + +Deterministic second half of the Autogen Docs Slack Notify workflow. +The Claude step (claude-code-action) only COMPOSES the message content +and writes it to a JSON file; it never holds the Slack token and has no +curl/network access. This script is the ONLY place the Slack token +lives, and it only ever contacts slack.com. + +It reads the JSON content file, resolves each reviewer's GitHub login to +a Slack user id (falling back to the literal @handle when no confident +match exists), builds one Slack mrkdwn message, and posts it via +chat.postMessage. + +Reviewer resolution reads the workspace's custom "GitHub handle" profile +field reliably: it first discovers that field's id once via +team.profile.get, then enumerates members via users.list (used only for +the id list) and reads each member's custom field value via +users.profile.get. Slack's bulk users.list does NOT reliably return +custom profile fields, so the per-user users.profile.get call is required +to read them. + +Python 3 standard library only. +""" + +import json +import os +import sys +import time +import urllib.error +import urllib.parse +import urllib.request + +# Hardcoded so this script can ONLY ever talk to Slack. Do not make this +# configurable from the environment / message file (defense in depth: +# the message content is composed by a prompt-injectable Claude step). +SLACK_BASE_URL = "https://slack.com" + + +def fail(message): + """Print an error and exit non-zero so the workflow step fails.""" + print(message, file=sys.stderr) + sys.exit(1) + + +def require_env(name): + value = os.environ.get(name) + if not value: + fail("Missing required environment variable: {}".format(name)) + return value + + +def load_message(path): + """Load and minimally validate the JSON content file from step 1.""" + try: + with open(path, "r", encoding="utf-8") as handle: + data = json.load(handle) + except FileNotFoundError: + fail("Message file not found: {}".format(path)) + except (OSError, ValueError) as exc: + fail("Could not read/parse message file {}: {}".format(path, exc)) + + if not isinstance(data, dict): + fail("Message file {} did not contain a JSON object".format(path)) + return data + + +def slack_get(token, method, params): + """GET a Slack Web API method and return the parsed JSON response.""" + url = "{}/api/{}?{}".format( + SLACK_BASE_URL, method, urllib.parse.urlencode(params) + ) + request = urllib.request.Request(url, method="GET") + request.add_header("Authorization", "Bearer {}".format(token)) + with urllib.request.urlopen(request) as response: + return json.loads(response.read().decode("utf-8")) + + +def slack_get_with_retry(token, method, params, max_retries=3, + default_retry_after=3): + """GET a Slack Web API method, honoring HTTP 429 rate limits. + + On HTTP 429 the Retry-After response header (seconds) is respected; + if it is missing or unparseable a small default delay is used. The + same call is retried up to max_retries times. Returns the parsed JSON + response (which may still carry "ok": false for non-429 API errors). + """ + url = "{}/api/{}?{}".format( + SLACK_BASE_URL, method, urllib.parse.urlencode(params) + ) + attempt = 0 + while True: + request = urllib.request.Request(url, method="GET") + request.add_header("Authorization", "Bearer {}".format(token)) + try: + with urllib.request.urlopen(request) as response: + return json.loads(response.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + if exc.code == 429 and attempt < max_retries: + retry_after = default_retry_after + header = exc.headers.get("Retry-After") + if header: + try: + retry_after = int(header) + except (TypeError, ValueError): + retry_after = default_retry_after + time.sleep(max(retry_after, 1)) + attempt += 1 + continue + raise + + +def slack_post_json(token, method, payload): + """POST JSON to a Slack Web API method and return the parsed JSON.""" + url = "{}/api/{}".format(SLACK_BASE_URL, method) + body = json.dumps(payload).encode("utf-8") + request = urllib.request.Request(url, data=body, method="POST") + request.add_header("Authorization", "Bearer {}".format(token)) + request.add_header( + "Content-type", "application/json; charset=utf-8" + ) + with urllib.request.urlopen(request) as response: + return json.loads(response.read().decode("utf-8")) + + +def normalize_handle(value): + """Normalize a possible GitHub handle for comparison. + + Lowercases, strips a leading "@", and reduces a full + https://github.com/ URL form to just . + """ + if not isinstance(value, str): + return "" + candidate = value.strip().lower() + if not candidate: + return "" + # Tolerate a full GitHub profile URL form. + for prefix in ("https://github.com/", "http://github.com/", "github.com/"): + if candidate.startswith(prefix): + candidate = candidate[len(prefix):] + break + # Drop a trailing slash and anything after it (e.g. URL path tail). + candidate = candidate.split("/", 1)[0] + if candidate.startswith("@"): + candidate = candidate[1:] + return candidate + + +def find_github_field_id(token): + """Find the workspace custom-profile field id for the GitHub handle. + + Uses team.profile.get, whose response carries profile.fields: a list + of field definitions each with id (e.g. "Xf0..."), label and hint. + Selects the field whose label, lowercased, equals or contains + "github" (so "GitHub Handle", "GitHub", "Github Username" all match), + preferring an exact-ish "github handle"/"github" label when several + match. Returns the field id, or None (with a warning) when no field + matches so the caller degrades gracefully to plain @handle text. + """ + response = slack_get(token, "team.profile.get", {}) + if not response.get("ok"): + print( + "Warning: team.profile.get failed ({}); " + "falling back to plain @handles".format(response.get("error")), + file=sys.stderr, + ) + return None + + fields = response.get("profile", {}).get("fields") + if not isinstance(fields, list): + print( + "Warning: team.profile.get returned no profile.fields; " + "falling back to plain @handles", + file=sys.stderr, + ) + return None + + candidates = [] + for entry in fields: + if not isinstance(entry, dict): + continue + field_id = entry.get("id") + label = entry.get("label") + if not field_id or not isinstance(label, str): + continue + label_lower = label.strip().lower() + if label_lower == "github" or "github" in label_lower: + candidates.append((field_id, label_lower)) + + if not candidates: + print( + "Warning: no custom profile field label matches 'github'; " + "falling back to plain @handles", + file=sys.stderr, + ) + return None + + # Prefer an exact-ish "github handle"/"github" label. + for field_id, label_lower in candidates: + if label_lower in ("github handle", "github"): + return field_id + + if len(candidates) > 1: + print( + "Warning: multiple custom profile fields match 'github' " + "({}); using the first one ({}).".format( + ", ".join(label for _, label in candidates), + candidates[0][1], + ), + file=sys.stderr, + ) + return candidates[0][0] + + +def iter_member_ids(token): + """Yield non-deleted, non-bot member ids by paginating users.list. + + users.list is used only for the member id list here; its bulk + response does not reliably include custom profile fields, so those + are read per-user via users.profile.get instead. + """ + cursor = "" + while True: + params = {"limit": 200} + if cursor: + params["cursor"] = cursor + response = slack_get(token, "users.list", params) + if not response.get("ok"): + print( + "Warning: users.list failed ({}); " + "falling back to plain @handles".format( + response.get("error") + ), + file=sys.stderr, + ) + return + + for member in response.get("members", []): + if not isinstance(member, dict): + continue + if member.get("deleted") or member.get("is_bot"): + continue + user_id = member.get("id") + if user_id: + yield user_id + + cursor = ( + response.get("response_metadata", {}).get("next_cursor") or "" + ) + if not cursor: + break + + +def build_login_to_slack_id(token): + """Build a map of normalized GitHub handle -> Slack user id. + + Discovers the GitHub-handle custom-profile field id once via + team.profile.get, enumerates member ids via users.list, then reads + each member's value for that field via users.profile.get. A handle + that maps to more than one distinct member is treated as ambiguous + and dropped (the caller then falls back to @handle text). + """ + field_id = find_github_field_id(token) + if not field_id: + return {} + + matches = {} + ambiguous = set() + for user_id in iter_member_ids(token): + # Minimal politeness delay between per-user profile reads. + time.sleep(0.05) + try: + response = slack_get_with_retry( + token, "users.profile.get", {"user": user_id} + ) + except urllib.error.URLError as exc: + print( + "Warning: users.profile.get for {} failed ({}); " + "skipping".format(user_id, exc), + file=sys.stderr, + ) + continue + + if not response.get("ok"): + print( + "Warning: users.profile.get for {} returned ok=false " + "({}); skipping".format(user_id, response.get("error")), + file=sys.stderr, + ) + continue + + fields = response.get("profile", {}).get("fields") + if not isinstance(fields, dict): + continue + entry = fields.get(field_id) + if not isinstance(entry, dict): + continue + handle = normalize_handle(entry.get("value")) + if not handle: + continue + if handle in matches and matches[handle] != user_id: + ambiguous.add(handle) + else: + matches[handle] = user_id + + for handle in ambiguous: + matches.pop(handle, None) + return matches + + +def reviewer_tag(login, login_to_slack_id): + """Return a Slack tag for a reviewer: <@ID> if uniquely resolved, + else the literal @login text. Never guesses an id.""" + slack_id = login_to_slack_id.get(normalize_handle(login)) + if slack_id: + return "<@{}>".format(slack_id) + return "@{}".format(login) + + +def build_message(content, pr_url, login_to_slack_id): + """Build the single Slack mrkdwn message string.""" + headline = str(content.get("headline") or "Documentation update") + lines = ["<{}|{}>".format(pr_url, headline)] + + bullets = content.get("summary_bullets") + if isinstance(bullets, list): + for bullet in bullets: + if isinstance(bullet, str) and bullet.strip(): + lines.append("• {}".format(bullet.strip())) + + reviewers = content.get("reviewers") + if isinstance(reviewers, list) and reviewers: + lines.append("") + lines.append("*Reviewers*") + for reviewer in reviewers: + if not isinstance(reviewer, dict): + continue + login = reviewer.get("login") + if not login: + continue + note = reviewer.get("note") or "" + tag = reviewer_tag(login, login_to_slack_id) + if note: + lines.append("{} — {}".format(tag, note)) + else: + lines.append("{}".format(tag)) + + return "\n".join(lines) + + +def main(): + token = require_env("SLACK_BOT_TOKEN") + channel = require_env("DOCS_SLACK_CHANNEL_ID") + message_file = require_env("MESSAGE_FILE") + pr_url = require_env("PR_URL") + + content = load_message(message_file) + # Prefer the PR url from the trusted env var over the content file. + pr_url = pr_url or str(content.get("pr_url") or "") + + login_to_slack_id = build_login_to_slack_id(token) + message = build_message(content, pr_url, login_to_slack_id) + + response = slack_post_json( + token, + "chat.postMessage", + {"channel": channel, "text": message}, + ) + if not response.get("ok"): + print("Slack chat.postMessage failed:", file=sys.stderr) + print(json.dumps(response, indent=2), file=sys.stderr) + sys.exit(1) + + print("Posted reviewer summary to Slack channel {}.".format(channel)) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/autogen-docs-notify.yml b/.github/workflows/autogen-docs-notify.yml new file mode 100644 index 00000000..131fe652 --- /dev/null +++ b/.github/workflows/autogen-docs-notify.yml @@ -0,0 +1,187 @@ +name: Autogen Docs Slack Notify + +# Notifies the team's #documentation Slack channel when an +# `autogen-docs` PR is flipped from draft to ready-for-review. +# +# These PRs are the version-bump docs PRs Renovate opens and the +# Upstream Release Docs workflow augments: that workflow runs the +# upstream-release-docs skill, assigns reviewers, and at the very +# end flips the PR draft -> ready via `gh pr ready`. That flip emits +# the `ready_for_review` event this workflow listens for. +# `autogen-docs` is a LABEL Renovate applies (not a branch prefix), +# and the PRs are authored by `renovate[bot]`, so the job gates on +# both the label and the author below. +# +# SECURITY MODEL +# -------------- +# This workflow triggers on `pull_request_target` (not +# `pull_request`). `pull_request_target` runs the workflow definition +# from the TRUSTED BASE branch rather than from the (potentially +# attacker-controlled) PR head, while still exposing the PR metadata +# on `github.event.pull_request`. The label + `renovate[bot]` author +# guard below works identically on that payload. This is safe here +# because the workflow NEVER checks out or executes any PR-supplied +# code -- it only reads PR metadata via the `gh` CLI. +# +# The work is split into two steps so the prompt-injectable Claude +# session can never touch the Slack token or make arbitrary outbound +# network calls: +# STEP 1 (Claude / claude-code-action): composes the message CONTENT +# ONLY and writes it to a JSON file. Its tools are limited to +# `Bash(gh:*)` and `Write` -- there is NO curl, and the Slack +# token is NOT in this step's environment. So even a prompt +# injection embedded in PR content cannot exfiltrate the Slack +# token (it is absent) nor reach an arbitrary host (no curl, no +# general Bash). +# STEP 2 (deterministic `run:` step): a small stdlib-only Python +# script (`.github/scripts/post_autogen_docs_slack.py`) reads that +# JSON, resolves reviewer GitHub handles -> Slack ids, and posts +# ONE message to Slack. Only this step holds `SLACK_BOT_TOKEN`, +# and it only ever contacts slack.com. +# +# --------------------------------------------------------------------- +# Before this can run: +# - Add a NEW repository secret `SLACK_BOT_TOKEN` (a Slack bot token, +# xoxb-...) with these scopes: +# * chat:write - post the message +# * users:read - list users to map GitHub handle -> Slack id +# * users.profile:read - read custom profile fields (the GitHub field) +# * users:read.email - only if email-based matching is used as a fallback +# - The Slack bot must be INVITED to the #documentation channel +# (channel id C06SZA9HBHU) or chat.postMessage will fail with +# `not_in_channel`. +# - Reviewer @-tagging in Slack depends on each reviewer's GitHub +# handle being present in their Slack profile (a "GitHub" custom +# profile field, pushed via Okta). When no match is found the +# message falls back to the plain GitHub @handle as text rather +# than guessing a Slack id. +# +# Channel ids are not secret, so the #documentation channel id is set +# directly as an env var below (DOCS_SLACK_CHANNEL_ID), not as a secret. +# --------------------------------------------------------------------- + +on: + pull_request_target: + types: [ready_for_review] + +permissions: + contents: read + pull-requests: read + # Required by anthropics/claude-code-action@v1 for OIDC token exchange. + id-token: write + # Required for Claude to read CI results / Actions context on PRs. + actions: read + +jobs: + notify: + runs-on: ubuntu-latest + timeout-minutes: 15 + # Only run for the Renovate-authored docs PRs carrying the + # `autogen-docs` label. Human PRs that happen to go ready, and + # any other bot's PRs, are out of scope. + if: | + contains(github.event.pull_request.labels.*.name, 'autogen-docs') && + github.event.pull_request.user.login == 'renovate[bot]' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + MESSAGE_FILE: ${{ github.workspace }}/.autogen-docs-slack-message.json + steps: + # STEP 1 — Claude composes content only. Tools limited to gh + Write. + # No Slack token in this step's env; no curl tool. So even a prompt + # injection from PR content cannot exfiltrate the Slack token (absent) + # nor make arbitrary network calls (no curl/Bash beyond gh). + - name: Compose reviewer summary + uses: anthropics/claude-code-action@51705da45eecce209d4700538bf8377d5b5fc695 # v1.0.152 + env: + PR_NUMBER: ${{ github.event.pull_request.number }} + GH_REPO: ${{ github.repository }} + PR_URL: ${{ github.event.pull_request.html_url }} + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + additional_permissions: | + actions: read + # The triggering PR is authored by Renovate; without this, + # claude-code-action refuses to run for bot-initiated PRs as + # a safety default. + allowed_bots: 'renovate' + claude_args: | + --model claude-opus-4-7 + --max-turns 30 + --allowed-tools "Bash(gh:*) Write" + prompt: | + You are running in GitHub Actions with no interactive user. + Follow these steps exactly and do NOT ask clarifying + questions -- proceed best-effort at every decision point. + Your ONLY job is to COMPOSE the CONTENT of a reviewer + summary about a documentation PR and WRITE it to a JSON + file, then exit. You do NOT post anything. You do NOT call + curl. You have no Slack token and no network access beyond + the `gh` CLI. A later, separate, deterministic step does the + actual Slack posting. + + Context (available to you as environment variables): + PR_NUMBER - the PR number + GH_REPO - owner/repo of this repository + PR_URL - the PR's html_url + MESSAGE_FILE - the path to write the JSON output file to + GH_TOKEN - auth for the `gh` CLI + + STEP 1 — Read the PR. + Run: + gh pr view "$PR_NUMBER" --repo "$GH_REPO" --json title,body,reviewRequests,files,url + From the JSON, extract: + - the PR title + - the PR body. The body contains a marker-delimited + section written by the Upstream Release Docs workflow, + between `` and + ``. Inside it is an + "At a glance" table / summary. PREFER reusing that + summary's content for your bullets; do not pad beyond it. + - the list of changed files (for a fallback sense of scope + only — do not enumerate every file). + - the list of REQUESTED REVIEWERS (reviewRequests[].login + are GitHub logins). + + STEP 2 — Write the message content as JSON. + Use the **Write** tool to write a file at the path given by + the env var MESSAGE_FILE. The file MUST be valid JSON with + EXACTLY this shape: + { + "headline": "", + "summary_bullets": ["...", "..."], + "reviewers": [ + {"login": "", "note": ""} + ], + "pr_url": "" + } + Rules: + - "headline" is plain text only (NO Slack/markdown link + syntax, NO ids). The poster step turns it into a link. + - "summary_bullets" is a tight list of 2–4 strings, + reusing the PR's "At a glance" content. No padding, no + raw file lists, no internal ids. + - "reviewers" has one entry per REQUESTED reviewer. "login" + is the plain GitHub login (no "@"). "note" is the + specific thing that reviewer should check. When deciding + each note, keep this shared expectation in mind and let + it shape the notes: the goal is for everyone involved in + the release to review and approve within 2 business days; + a given reviewer's contribution to the release may not + have produced any user- or docs-facing changes, which is + expected and fine, and in that case their approval simply + confirms nothing was missed in the generated docs. + - "pr_url" is the value of the PR_URL env var. + Do NOT include any Slack ids, Slack mrkdwn, `<@...>` tags, or + `` links — only the plain content above. Do NOT + post anything. Do NOT call curl. After writing the file, + print a brief confirmation and finish. + # STEP 2 — Deterministic Slack post. Only this step holds the Slack + # token, and it only contacts slack.com. Resolves reviewer GitHub + # handles -> Slack ids here (not in the Claude step). + - name: Post summary to Slack + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + DOCS_SLACK_CHANNEL_ID: C06SZA9HBHU + PR_URL: ${{ github.event.pull_request.html_url }} + run: | + python3 .github/scripts/post_autogen_docs_slack.py