From f72619cda7d88c84bae90037585019f67f34999a Mon Sep 17 00:00:00 2001 From: Hector Flores Date: Sat, 13 Jun 2026 13:27:38 -0500 Subject: [PATCH 1/2] feat: add 3 new error entries (runner-environment x1, permissions-auth x1, triggers x1) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- errors/permissions-auth/pa-124.yml | 130 +++++++++++++++++++++++++++ errors/runner-environment/re-505.yml | 116 ++++++++++++++++++++++++ errors/triggers/tr-120.yml | 120 +++++++++++++++++++++++++ 3 files changed, 366 insertions(+) create mode 100644 errors/permissions-auth/pa-124.yml create mode 100644 errors/runner-environment/re-505.yml create mode 100644 errors/triggers/tr-120.yml diff --git a/errors/permissions-auth/pa-124.yml b/errors/permissions-auth/pa-124.yml new file mode 100644 index 0000000..d22bbb3 --- /dev/null +++ b/errors/permissions-auth/pa-124.yml @@ -0,0 +1,130 @@ +id: permissions-auth-124 +title: 'OIDC sub Claim for pull_request.closed Does Not Match refs/heads/main IAM Trust Policy' +category: permissions-auth +severity: error +tags: + - oidc + - aws + - iam + - pull-request + - sub-claim + - AssumeRoleWithWebIdentity + - pull-request-closed +patterns: + - regex: 'Not authorized to perform sts:AssumeRoleWithWebIdentity' + flags: 'i' + - regex: 'error.*AssumeRoleWithWebIdentity.*not authorized' + flags: 'i' + - regex: 'Is not authorized to assume role.*pull.request' + flags: 'i' +error_messages: + - 'Not authorized to perform sts:AssumeRoleWithWebIdentity' + - 'Error: Not authorized to perform sts:AssumeRoleWithWebIdentity' + - 'Error: Assuming role failed: Not authorized to perform sts:AssumeRoleWithWebIdentity' +root_cause: | + When a workflow is triggered by a `pull_request` event (including `types: [closed]` used + for post-merge deployments), the OIDC `sub` (subject) claim in the GitHub-issued JWT is + **always** `repo:/:pull_request` — regardless of whether the PR was merged into + `main` or another branch. + + Developers commonly configure their AWS IAM trust policy with: + ``` + "token.actions.githubusercontent.com:sub": "repo:/:ref:refs/heads/main" + ``` + This condition matches `push` events on `main` but **never** matches a + `pull_request.closed` event, because the sub claim format is different for PR events. + + Result: the OIDC credential exchange fails with `Not authorized to perform + sts:AssumeRoleWithWebIdentity` even though the PR was successfully merged to main and the + intent is to deploy from main. + + GitHub OIDC sub claim formats by event type: + - `push` to main → `repo:/:ref:refs/heads/main` + - `pull_request` (any type) → `repo:/:pull_request` + - `workflow_dispatch` → `repo:/:ref:refs/heads/` + - `release` → `repo:/:ref:refs/tags/` +fix: | + Update the AWS IAM trust policy to also allow `pull_request` sub claims for the same + role, OR use a separate IAM role for PR-triggered deployments, OR switch the trigger to + `push` on main (which fires after merge and has the `refs/heads/main` sub claim). + + **Option 1 — Allow both sub patterns in the same role (StringLike array):** + Add `repo:/:pull_request` alongside the existing branch condition. + + **Option 2 — Switch trigger to push on main:** + Replace the `pull_request: [closed]` trigger with `push: branches: [main]`. The push + event fires after a merge and its sub claim will be `refs/heads/main`. + + **Option 3 — Custom sub claim (recommended for tight scoping):** + Enable custom OIDC sub claim configuration in GitHub repository/org settings to scope + the subject to `environment:` or `job_workflow_ref` instead of the default event + sub, then update the IAM condition accordingly. +fix_code: + - language: yaml + label: 'IAM trust policy — allow both push and pull_request sub claims' + code: | + # In AWS IAM Role trust policy JSON: + # "Condition": { + # "StringEquals": { + # "token.actions.githubusercontent.com:aud": "sts.amazonaws.com" + # }, + # "StringLike": { + # "token.actions.githubusercontent.com:sub": [ + # "repo:my-org/my-repo:ref:refs/heads/main", + # "repo:my-org/my-repo:pull_request" + # ] + # } + # } + + # Workflow that uses pull_request.closed: + on: + pull_request: + branches: [main] + types: [closed] + + jobs: + deploy: + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + steps: + - uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::123456789012:role/deploy-role + aws-region: us-east-1 + - language: yaml + label: 'Preferred — switch to push trigger to get refs/heads/main sub claim' + code: | + # push to main fires AFTER merge; sub claim is refs/heads/main + on: + push: + branches: [main] + + jobs: + deploy: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + steps: + - uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::123456789012:role/deploy-role + aws-region: us-east-1 +prevention: + - 'Prefer `push` on `main` over `pull_request.closed` for post-merge deployments — the sub claim is cleaner and matches the branch ref' + - 'Document which sub claim format each IAM role trusts so future changes remain consistent' + - 'Use `github.event.pull_request.merged == true` guard when using `pull_request.closed` trigger to avoid deploying on unmerged close' + - 'Test OIDC trust policy by printing the sub claim: add a step `run: echo $ACTIONS_ID_TOKEN_REQUEST_URL` and decode the token to verify sub format' + - 'Consider GitHub custom OIDC sub claims (repository settings → Actions → General) to bind trust to a specific environment rather than the event type' +docs: + - url: 'https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/about-security-hardening-with-openid-connect#understanding-the-oidc-token' + label: 'GitHub Docs — Understanding the OIDC token — sub claim format per event type' + - url: 'https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/configuring-openid-connect-in-amazon-web-services' + label: 'GitHub Docs — Configuring OIDC in AWS' + - url: 'https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/about-security-hardening-with-openid-connect#customizing-the-subject-claims-for-an-organization-or-repository' + label: 'GitHub Docs — Customizing OIDC subject claims' + - url: 'https://stackoverflow.com/q/78746014' + label: 'Stack Overflow — Not authorized to perform sts:AssumeRoleWithWebIdentity during OIDC on PR merge' diff --git a/errors/runner-environment/re-505.yml b/errors/runner-environment/re-505.yml new file mode 100644 index 0000000..40cd4ee --- /dev/null +++ b/errors/runner-environment/re-505.yml @@ -0,0 +1,116 @@ +id: runner-environment-505 +title: 'GPG Signing Fails — "Inappropriate ioctl for device" (No TTY in Runner)' +category: runner-environment +severity: error +tags: + - gpg + - signing + - maven + - gradle + - pinentry + - non-interactive + - ossrh +patterns: + - regex: 'gpg: signing failed: Inappropriate ioctl for device' + flags: 'i' + - regex: 'gpg:\s+signing failed.*ioctl' + flags: 'i' + - regex: 'error:.*sign.*ioctl for device' + flags: 'i' +error_messages: + - 'gpg: signing failed: Inappropriate ioctl for device' + - 'gpg: [stdin]: sign+encrypt failed: Inappropriate ioctl for device' + - 'Error: Process completed with exit code 2.' +root_cause: | + GitHub Actions runners have no interactive terminal (TTY). The GPG agent's default + `pinentry` mode requires a real TTY to prompt the user for a passphrase — even when the + passphrase is provided via environment variable or Maven/Gradle settings. Without a TTY, + `pinentry` cannot open a terminal for input and raises `Inappropriate ioctl for device` + (IOCTL ENOTTY). + + This affects any workflow that invokes GPG signing directly or through build tools + (Maven `maven-gpg-plugin`, Gradle signing plugin, `git commit --gpg-sign`, etc.). The + passphrase **is** available but GPG refuses to read it through the non-TTY path unless + explicitly told to use loopback mode. + + Common trigger scenarios: + - Publishing Java artifacts to Maven Central / Sonatype OSSRH via `maven-gpg-plugin` + - Signing releases with the Gradle signing plugin + - Creating signed git tags or commits inside a workflow + - Any `gpg --batch --sign` invocation without `--pinentry-mode loopback` +fix: | + Use `--pinentry-mode loopback` to tell GPG to read the passphrase from the calling + process rather than launching a pinentry dialog. This must be configured at both the + GPG-agent level and, for build tool plugins, in their plugin configuration. + + **For Maven `maven-gpg-plugin`:** add `` with `--pinentry-mode loopback`. + **For direct `gpg` commands:** pass `--batch --pinentry-mode loopback`. + **For git commit signing:** set `GPG_TTY` and configure `gpg.program` wrapper. +fix_code: + - language: yaml + label: 'Maven GPG plugin — add pinentry-mode loopback in pom.xml' + code: | + # In pom.xml inside the maven-gpg-plugin : + # + # --pinentry-mode + # loopback + # + + # Workflow step that triggers the signed deploy: + - name: Publish to Maven Central + run: mvn --no-transfer-progress --batch-mode clean deploy -P release -DskipTests + env: + MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} + - language: yaml + label: 'Direct GPG signing — set pinentry-mode loopback via gpg.conf' + code: | + - name: Configure GPG for non-interactive signing + run: | + mkdir -p ~/.gnupg + chmod 700 ~/.gnupg + echo "use-agent" >> ~/.gnupg/gpg.conf + echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf + echo "allow-loopback-pinentry" >> ~/.gnupg/gpg-agent.conf + echo "RELOADAGENT" | gpg-connect-agent + shell: bash + + - name: Import GPG key + run: echo "${{ secrets.GPG_PRIVATE_KEY }}" | gpg --batch --import + env: + GNUPGHOME: ~/.gnupg + + - name: Sign file + run: gpg --batch --yes --passphrase "${{ secrets.GPG_PASSPHRASE }}" --pinentry-mode loopback --detach-sign file.tar.gz + - language: yaml + label: 'actions/setup-java with gpg-private-key — passphrase via env var' + code: | + - name: Set up Java with GPG + uses: actions/setup-java@v4 + with: + java-version: '21' + distribution: 'temurin' + gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} + gpg-passphrase: MAVEN_GPG_PASSPHRASE # env var name, not the value + + # In pom.xml maven-gpg-plugin add: + # --pinentry-modeloopback + + - name: Deploy + run: mvn --batch-mode deploy -P release + env: + MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} +prevention: + - 'Always add `--pinentry-mode loopback` when invoking GPG in a non-interactive environment' + - 'Add `pinentry-mode loopback` and `allow-loopback-pinentry` to `~/.gnupg/gpg.conf` and `gpg-agent.conf` respectively at the start of the job' + - 'For Maven: configure `` in the `maven-gpg-plugin` rather than passing flags on the command line' + - 'Import the GPG key in a dedicated step before any signing step; verify import with `gpg --list-secret-keys`' + - 'Test signing in CI with a throwaway key before production deployment' +docs: + - url: 'https://www.gnupg.org/documentation/manuals/gnupg/Agent-Configuration.html' + label: 'GnuPG Agent Configuration — allow-loopback-pinentry' + - url: 'https://maven.apache.org/plugins/maven-gpg-plugin/sign-mojo.html' + label: 'maven-gpg-plugin sign goal — gpgArguments parameter' + - url: 'https://stackoverflow.com/q/77703115' + label: 'Stack Overflow — gpg: signing failed: Inappropriate ioctl for device on GitHub Actions' + - url: 'https://docs.github.com/en/actions/use-cases-and-examples/publishing-packages/publishing-java-packages-with-maven' + label: 'GitHub Docs — Publishing Java packages with Maven' diff --git a/errors/triggers/tr-120.yml b/errors/triggers/tr-120.yml new file mode 100644 index 0000000..0e33ce5 --- /dev/null +++ b/errors/triggers/tr-120.yml @@ -0,0 +1,120 @@ +id: triggers-120 +title: 'workflow_run Downstream Trigger Continues Firing After Upstream Workflow File Is Deleted' +category: triggers +severity: known-unsolved +tags: + - workflow-run + - trigger + - deleted-workflow + - caching + - known-limitation + - phantom-trigger +patterns: + - regex: 'workflow_run.*trigger.*deleted|downstream.*workflow.*deleted.*upstream' + flags: 'i' + - regex: 'workflow.*no longer exists.*workflow_run' + flags: 'i' +error_messages: + - 'The workflow_run trigger fires even after the upstream workflow YAML file has been deleted' +root_cause: | + When a workflow file is deleted from the repository, GitHub does not immediately stop + resolving `workflow_run` triggers that reference it as the upstream workflow name. The + platform caches the workflow name→ID mapping and processes queued `completed` events for + runs that were already in flight at the time of deletion. Depending on queue depth and + processing lag, the downstream workflow can continue to fire for several minutes to a + few hours after the upstream file is removed. + + Additionally, if the upstream workflow name was renamed (the `name:` key changed without + renaming the file), `workflow_run` triggers that reference the old name string silently + stop matching — but may continue matching for in-flight runs. + + Observed patterns: + 1. **Post-delete phantom fires** — upstream YAML deleted, downstream fires 1–5+ more + times as previously-queued `completed` events drain. + 2. **Wrong commit SHA** — a `workflow_run` downstream workflow checks out the SHA from + `github.event.workflow_run.head_sha`, but that SHA may refer to a commit from another + branch that was merged earlier, not the latest commit on the triggering branch. + 3. **Infinite loop after rename** — upstream workflow is renamed; old `workflow_run` + triggers no longer match; developer creates a new downstream workflow referencing the + new name, but the old downstream workflow is never removed and fires on unrelated runs. + + There is no explicit GitHub API or UI mechanism to "flush" the pending `workflow_run` + event queue for a deleted workflow. +fix: | + Because this is a platform-level caching behavior with no user-controlled flush, the + mitigations focus on making the downstream workflow resilient: + + 1. **Guard with `if:` condition on workflow name** — check `github.event.workflow_run.name` + matches the exact expected upstream workflow name. This prevents phantom fires from + stale events after a rename. + + 2. **Guard with `if:` on conclusion** — most phantom fires complete the upstream run with + conclusion `success` or `failure`; filtering on conclusion prevents unwanted action. + + 3. **Add an `if:` condition on head_branch** — validate that + `github.event.workflow_run.head_branch` is the expected branch before proceeding. + + 4. **Wait out the cache** — after deleting or renaming the upstream workflow, wait + 10–30 minutes before relying on the downstream workflow being fully quiesced. + + 5. **Migrate to composite or reusable workflows** — avoids `workflow_run` trigger + entirely by making the downstream logic a reusable workflow called directly. +fix_code: + - language: yaml + label: 'Guard downstream workflow against phantom fires after upstream deletion/rename' + code: | + on: + workflow_run: + workflows: ["Frontend CI"] # exact name from upstream name: key + types: [completed] + branches: [main] + + jobs: + deploy: + # Explicit guards against phantom fires from stale cached events + if: | + github.event.workflow_run.conclusion == 'success' && + github.event.workflow_run.name == 'Frontend CI' && + github.event.workflow_run.head_branch == 'main' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.workflow_run.head_sha }} + - language: yaml + label: 'Migration — use reusable workflow instead of workflow_run to avoid trigger cache issues' + code: | + # In .github/workflows/frontend-ci.yml (upstream): + jobs: + test: + uses: ./.github/workflows/_deploy.yml + # OR: call the deploy job directly as a needs: dependency + # This removes the workflow_run trigger layer entirely. + + # Downstream logic becomes a reusable workflow called by the upstream: + # .github/workflows/_deploy.yml (reusable): + on: + workflow_call: + inputs: + environment: + required: true + type: string + jobs: + deploy: + runs-on: ubuntu-latest + environment: ${{ inputs.environment }} + steps: + - run: echo "Deploying" +prevention: + - 'After deleting an upstream workflow file, wait 15–30 minutes before expecting the downstream `workflow_run` trigger to stop firing' + - 'Always include an `if:` guard on `github.event.workflow_run.name` in the downstream workflow to prevent misfire after renames' + - 'Pin `github.event.workflow_run.head_sha` explicitly in the `actions/checkout` `ref:` parameter to ensure the correct commit is checked out' + - 'Prefer reusable workflows (`workflow_call`) over `workflow_run` for tightly-coupled CI→deploy chains — it avoids the trigger cache entirely' + - 'When renaming an upstream workflow, update all `workflow_run` trigger references atomically in the same PR' +docs: + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#workflow_run' + label: 'GitHub Docs — workflow_run event' + - url: 'https://docs.github.com/en/actions/sharing-automations/reusing-workflows' + label: 'GitHub Docs — Reusing workflows (workflow_call alternative)' + - url: 'https://stackoverflow.com/q/79941021' + label: 'Stack Overflow — workflow_run trigger keeps firing with old commit SHA after workflow file deleted' From df705ed59c90496535842dd2bcda24e20b274c44 Mon Sep 17 00:00:00 2001 From: Hector Flores <100806365+htekdev@users.noreply.github.com> Date: Sat, 13 Jun 2026 13:32:58 -0500 Subject: [PATCH 2/2] fix: correct severity field in tr-120 (known-unsolved -> limitation) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- errors/triggers/tr-120.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/errors/triggers/tr-120.yml b/errors/triggers/tr-120.yml index 0e33ce5..a800b3b 100644 --- a/errors/triggers/tr-120.yml +++ b/errors/triggers/tr-120.yml @@ -1,7 +1,7 @@ id: triggers-120 title: 'workflow_run Downstream Trigger Continues Firing After Upstream Workflow File Is Deleted' category: triggers -severity: known-unsolved +severity: limitation tags: - workflow-run - trigger