From 96f0c4bcd3fdb74059d9d1f5cc00d7abefafba68 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 12 May 2026 21:02:57 +0000
Subject: [PATCH 1/8] Replace chapter 8 with Waza eval lab

---
 .github/agents/token-eval-reviewer.agent.md   | 21 +++++++
 .../token-optimization.instructions.md        |  7 +++
 .github/prompts/context-triage.prompt.md      | 17 +++++
 .github/skills/token-optimization.skill.md    | 10 +++
 .github/workflows/waza-evals.yml              | 55 ++++++++++++++++
 evals/token-optimization/eval.yaml            | 25 ++++++++
 .../tasks/context-triage.yaml                 | 17 +++++
 labs/08-ai-evals-and-observability.md         | 62 ++++++++++++++-----
 labs/README.md                                |  2 +-
 skills/token-optimization/SKILL.md            | 29 +++++++++
 10 files changed, 228 insertions(+), 17 deletions(-)
 create mode 100644 .github/agents/token-eval-reviewer.agent.md
 create mode 100644 .github/instructions/token-optimization.instructions.md
 create mode 100644 .github/prompts/context-triage.prompt.md
 create mode 100644 .github/skills/token-optimization.skill.md
 create mode 100644 .github/workflows/waza-evals.yml
 create mode 100644 evals/token-optimization/eval.yaml
 create mode 100644 evals/token-optimization/tasks/context-triage.yaml
 create mode 100644 skills/token-optimization/SKILL.md

diff --git a/.github/agents/token-eval-reviewer.agent.md b/.github/agents/token-eval-reviewer.agent.md
new file mode 100644
index 0000000..eecede0
--- /dev/null
+++ b/.github/agents/token-eval-reviewer.agent.md
@@ -0,0 +1,21 @@
+---
+name: token-eval-reviewer
+description: Reviews Waza eval results and AI customization changes for quality, safety, and token efficiency.
+tools:
+  - codeSearch
+  - fileRead
+  - runCommand
+---
+
+# Token Eval Reviewer
+
+You review prompt, instruction, skill, and agent customizations.
+
+Focus on:
+
+- Whether the eval task measures a real customer workflow.
+- Whether fixtures are small, safe, and relevant.
+- Whether graders connect to correctness, groundedness, safety, developer experience, or token efficiency.
+- Whether workflow results are actionable for maintainers.
+
+Do not recommend adding proprietary third-party eval platforms for Chapter 8.
diff --git a/.github/instructions/token-optimization.instructions.md b/.github/instructions/token-optimization.instructions.md
new file mode 100644
index 0000000..e11b298
--- /dev/null
+++ b/.github/instructions/token-optimization.instructions.md
@@ -0,0 +1,7 @@
+# Token Optimization Instructions
+
+- Keep prompts, instructions, and agent definitions concise and testable.
+- Prefer repository facts and small fixtures over broad file inclusion.
+- Ask for clarification when the requested analysis lacks enough context.
+- Do not include secrets, production data, or private customer information in eval fixtures.
+- Tie every recommendation to quality, safety, cost, or developer experience.
diff --git a/.github/prompts/context-triage.prompt.md b/.github/prompts/context-triage.prompt.md
new file mode 100644
index 0000000..3e08015
--- /dev/null
+++ b/.github/prompts/context-triage.prompt.md
@@ -0,0 +1,17 @@
+---
+mode: ask
+description: Review a coding task for context quality and token efficiency.
+---
+
+# Context Triage
+
+Review the task, files, and instructions provided by the user.
+
+Identify:
+
+- Context that is necessary.
+- Context that is missing.
+- Context that can be removed.
+- Risks that require clarification before implementation.
+
+Return a concise recommendation before suggesting code changes.
diff --git a/.github/skills/token-optimization.skill.md b/.github/skills/token-optimization.skill.md
new file mode 100644
index 0000000..4950450
--- /dev/null
+++ b/.github/skills/token-optimization.skill.md
@@ -0,0 +1,10 @@
+---
+name: token-optimization
+description: Evaluate prompts, instructions, agents, and context selections for quality, safety, and token efficiency.
+---
+
+# Token Optimization Skill
+
+Use this skill when reviewing AI coding workflows for unnecessary context, unclear prompts, unsafe tool use, or inefficient model selection.
+
+Return findings that connect each recommendation to correctness, safety, developer experience, or token usage.
diff --git a/.github/workflows/waza-evals.yml b/.github/workflows/waza-evals.yml
new file mode 100644
index 0000000..92d7069
--- /dev/null
+++ b/.github/workflows/waza-evals.yml
@@ -0,0 +1,55 @@
+name: Waza Evals
+
+on:
+  pull_request:
+    paths:
+      - ".github/agents/**"
+      - ".github/instructions/**"
+      - ".github/prompts/**"
+      - ".github/skills/**"
+      - "evals/**"
+      - "skills/**"
+      - "labs/08-ai-evals-and-observability.md"
+  push:
+    branches: ["main"]
+    paths:
+      - ".github/agents/**"
+      - ".github/instructions/**"
+      - ".github/prompts/**"
+      - ".github/skills/**"
+      - "evals/**"
+      - "skills/**"
+      - "labs/08-ai-evals-and-observability.md"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  evaluate:
+    name: Run Waza eval suite
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install Waza
+        run: curl -fsSL https://raw.githubusercontent.com/microsoft/waza/main/install.sh | bash
+
+      - name: Verify Waza
+        run: waza --version
+
+      - name: Run Waza
+        run: |
+          mkdir -p waza-results
+          waza run evals/token-optimization/eval.yaml \
+            --verbose \
+            --output waza-results/results.json \
+            --reporter junit:waza-results/results.xml
+
+      - name: Upload Waza results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: waza-results
+          path: waza-results/
diff --git a/evals/token-optimization/eval.yaml b/evals/token-optimization/eval.yaml
new file mode 100644
index 0000000..579a097
--- /dev/null
+++ b/evals/token-optimization/eval.yaml
@@ -0,0 +1,25 @@
+name: token-optimization-eval
+description: Evaluation suite for token optimization workshop customizations.
+skill: token-optimization
+version: "1.0"
+
+config:
+  executor: mock
+  model: mock-model
+  trials_per_task: 1
+  timeout_seconds: 300
+  parallel: false
+
+graders:
+  - type: text
+    name: mentions_context_quality
+    config:
+      regex_match:
+        - "(?i)(context|token|fixture|prompt|instruction)"
+  - type: behavior
+    name: bounded_tool_use
+    config:
+      max_tool_calls: 5
+
+tasks:
+  - "tasks/*.yaml"
diff --git a/evals/token-optimization/tasks/context-triage.yaml b/evals/token-optimization/tasks/context-triage.yaml
new file mode 100644
index 0000000..be14737
--- /dev/null
+++ b/evals/token-optimization/tasks/context-triage.yaml
@@ -0,0 +1,17 @@
+id: context-triage
+name: Context triage recommendation
+description: The assistant should identify unnecessary context and recommend a measurable prompt or instruction improvement.
+tags:
+  - context
+  - token-efficiency
+inputs:
+  prompt: |
+    Review a coding assistant prompt that includes the whole repository for a one-line documentation fix.
+    Recommend how to reduce context while preserving answer quality.
+expected:
+  output_contains:
+    - "context"
+    - "prompt"
+    - "quality"
+  behavior:
+    max_tool_calls: 5
diff --git a/labs/08-ai-evals-and-observability.md b/labs/08-ai-evals-and-observability.md
index 451d33c..8475a68 100644
--- a/labs/08-ai-evals-and-observability.md
+++ b/labs/08-ai-evals-and-observability.md
@@ -2,11 +2,26 @@
 
 ## Core idea
 
-Token optimization should be measured. AI evals help teams compare prompts, instructions, models, memories, and tool configurations using repeatable examples instead of anecdotes.
+Token optimization should be measured with repeatable, customer-owned evals. Use Microsoft Waza as the workshop evaluation framework so teams can compare prompts, instructions, agents, models, and tool configurations without relying on proprietary third-party products.
 
-## Candidate eval platform: W&B Weave
+## Required eval framework: Microsoft Waza
 
-Consider W&B Weave for tracing, prompt and model comparison, qualitative review, and lightweight observability. If a customer already uses another eval platform, keep the workshop tool-agnostic and focus on repeatable datasets, rubrics, and decision criteria.
+Use Waza for this chapter's lab and examples:
+
+- Documentation: <https://microsoft.github.io/waza/>
+- Repository: <https://github.com/microsoft/waza>
+- Typical commands: `waza run`, `waza check`, `waza compare`, `waza coverage`, and `waza tokens`.
+- CI integration: run Waza in GitHub Actions and publish the result files as workflow artifacts.
+
+This repository includes the minimum workspace artifacts Waza and VS Code customizations need:
+
+- `skills/token-optimization/SKILL.md` for the Waza skill under evaluation.
+- `.github/skills/token-optimization.skill.md` as a workspace skill customization.
+- `.github/prompts/context-triage.prompt.md` as a reusable prompt.
+- `.github/instructions/token-optimization.instructions.md` as shared instructions.
+- `.github/agents/token-eval-reviewer.agent.md` as a VS Code custom agent.
+- `evals/token-optimization/eval.yaml` and task files for Waza.
+- `.github/workflows/waza-evals.yml` to run the eval suite in Actions.
 
 ## What to evaluate
 
@@ -17,15 +32,17 @@ Consider W&B Weave for tracing, prompt and model comparison, qualitative review,
 - Safety: did it avoid secrets, unsafe commands, or policy violations?
 - Developer experience: was the answer actionable?
 
-## Recommended tools to consider
+## Run Waza in GitHub Actions
+
+The Actions workflow should:
 
-- W&B Weave: tracing, prompt/version comparison, human review workflows.
-- promptfoo: lightweight prompt and model regression testing.
-- LangSmith: tracing, datasets, and eval workflows for LangChain-based systems.
-- OpenAI Evals or provider-native eval tools: model and prompt comparison.
-- Azure AI Evaluation: useful for Azure-hosted AI workflows.
-- Ragas or DeepEval: evaluation patterns for retrieval-augmented generation.
-- Custom GitHub Actions or CI checks: simple regression suites for prompts and agent instructions.
+1. Check out the repository.
+2. Install Waza from the official Microsoft project.
+3. Run `waza run evals/token-optimization/eval.yaml --verbose`.
+4. Save JSON and JUnit results.
+5. Upload results as artifacts for review.
+
+Use the mock executor for quick pull request validation. Switch to a real executor only when the customer is ready to provide the required credentials and accept the cost and data handling implications.
 
 ## Minimal eval dataset
 
@@ -39,11 +56,24 @@ Start with 10-20 examples:
 
 ## Hands-on lab
 
-1. Select three representative prompts.
-2. Run each with two instruction sets or two models.
-3. Score outputs from 1-5 on correctness, usefulness, and cost.
-4. Decide which change should become the new default.
+1. Open the repository's Waza workflow in `.github/workflows/waza-evals.yml`.
+2. Review the skill, prompt, instruction, and agent customization files.
+3. Run the Waza workflow from the Actions tab or by opening a pull request.
+4. Download the Waza result artifacts and identify which task failed or passed.
+5. Modify one prompt or instruction and rerun the workflow to compare results.
+6. Decide whether the customization improved correctness, safety, and token efficiency.
+
+## Run customer analyses in VS Code
+
+Use the Chat Customizations Evaluations extensions for VS Code to help customers run the same style of analysis locally:
+
+1. Install the VS Code extensions documented at <https://microsoft.github.io/waza/>.
+2. Open the customer's repository in VS Code.
+3. Add or review the workspace customizations: `*.skill.md`, `*.prompt.md`, `*.instructions.md`, and `*.agent.md`.
+4. Run local evaluations against representative prompts and fixtures.
+5. Compare local results with the GitHub Actions Waza results.
+6. Promote only the customizations that improve measured outcomes.
 
 ## Practical recommendation
 
-Use evals to justify changes to model routing, instruction files, MCP configuration, and memory strategy.
+Use Waza evals to justify changes to model routing, instruction files, prompt templates, agent definitions, MCP configuration, and memory strategy. Keep the eval data small, explicit, customer-owned, and safe to run in CI.
diff --git a/labs/README.md b/labs/README.md
index 78ea9ea..45150a5 100644
--- a/labs/README.md
+++ b/labs/README.md
@@ -22,7 +22,7 @@ Attendees will learn how to:
 - A GitHub account and access to an AI coding/chat tool.
 - A small sample repository with issues, tests, documentation, and a few realistic defects.
 - Optional: access to organization billing, Copilot usage, cloud AI usage, or model provider dashboards.
-- Optional: W&B Weave, LangSmith, promptfoo, OpenAI Evals, Azure AI Evaluation, or another eval/observability tool.
+- Microsoft Waza and the Chat Customizations Evaluations extensions for VS Code for the Chapter 8 eval lab.
 
 ## Delivery formats
 
diff --git a/skills/token-optimization/SKILL.md b/skills/token-optimization/SKILL.md
new file mode 100644
index 0000000..421a5fd
--- /dev/null
+++ b/skills/token-optimization/SKILL.md
@@ -0,0 +1,29 @@
+---
+name: token-optimization
+type: analysis
+description: |
+  USE FOR: Evaluating prompts, instructions, agents, and context choices for token-efficient software engineering workflows.
+  DO NOT USE FOR: Replacing security review, exposing secrets, or analyzing customer data without permission.
+license: MIT
+metadata:
+  version: "1.0"
+---
+
+# Token Optimization
+
+## Overview
+
+Help teams evaluate whether their AI coding customizations improve quality, safety, and token efficiency.
+
+## Triggers
+
+- "Evaluate this prompt for token efficiency."
+- "Compare these instructions for quality and cost."
+- "Review this agent customization before we make it the default."
+
+## Expectations
+
+- Prefer customer-owned eval data.
+- Keep fixtures small and relevant.
+- Score correctness, groundedness, safety, developer experience, and token efficiency.
+- Recommend measurable changes rather than subjective preferences.

From 4612845e257c1169e73a56a4e15fc2bb4e2c203d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 12 May 2026 21:04:16 +0000
Subject: [PATCH 2/8] Harden Waza workflow install step

---
 .github/workflows/waza-evals.yml | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/waza-evals.yml b/.github/workflows/waza-evals.yml
index 92d7069..0359775 100644
--- a/.github/workflows/waza-evals.yml
+++ b/.github/workflows/waza-evals.yml
@@ -34,7 +34,15 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Install Waza
-        run: curl -fsSL https://raw.githubusercontent.com/microsoft/waza/main/install.sh | bash
+        env:
+          WAZA_INSTALL_COMMIT: bf77c759d6bee3ba578c1980fa8a176fe8d014e4
+          WAZA_INSTALL_SHA256: f9f949c8ef7ed4e3309cee3f1285d2f7a7dea6db1c965ceab09443d3c7910d7a
+        run: |
+          curl -fsSL \
+            "https://raw.githubusercontent.com/microsoft/waza/${WAZA_INSTALL_COMMIT}/install.sh" \
+            -o install-waza.sh
+          echo "${WAZA_INSTALL_SHA256}  install-waza.sh" | sha256sum -c -
+          bash install-waza.sh
 
       - name: Verify Waza
         run: waza --version

From 852d8b155d221839b7e83ff05abd9a1a7bdddb3f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 12 May 2026 21:05:10 +0000
Subject: [PATCH 3/8] Address Waza validation review feedback

---
 .github/workflows/waza-evals.yml      | 1 +
 evals/token-optimization/eval.yaml    | 2 ++
 labs/08-ai-evals-and-observability.md | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/waza-evals.yml b/.github/workflows/waza-evals.yml
index 0359775..8a5486b 100644
--- a/.github/workflows/waza-evals.yml
+++ b/.github/workflows/waza-evals.yml
@@ -35,6 +35,7 @@ jobs:
 
       - name: Install Waza
         env:
+          # Pinned to a reviewed Microsoft Waza install script commit; update the commit and SHA-256 together.
           WAZA_INSTALL_COMMIT: bf77c759d6bee3ba578c1980fa8a176fe8d014e4
           WAZA_INSTALL_SHA256: f9f949c8ef7ed4e3309cee3f1285d2f7a7dea6db1c965ceab09443d3c7910d7a
         run: |
diff --git a/evals/token-optimization/eval.yaml b/evals/token-optimization/eval.yaml
index 579a097..2e8fcc2 100644
--- a/evals/token-optimization/eval.yaml
+++ b/evals/token-optimization/eval.yaml
@@ -4,6 +4,8 @@ skill: token-optimization
 version: "1.0"
 
 config:
+  # Use mock for pull request validation. For real model runs, change this to
+  # the executor and credential setup documented at https://microsoft.github.io/waza/.
   executor: mock
   model: mock-model
   trials_per_task: 1
diff --git a/labs/08-ai-evals-and-observability.md b/labs/08-ai-evals-and-observability.md
index 8475a68..8bb5e84 100644
--- a/labs/08-ai-evals-and-observability.md
+++ b/labs/08-ai-evals-and-observability.md
@@ -76,4 +76,4 @@ Use the Chat Customizations Evaluations extensions for VS Code to help customers
 
 ## Practical recommendation
 
-Use Waza evals to justify changes to model routing, instruction files, prompt templates, agent definitions, MCP configuration, and memory strategy. Keep the eval data small, explicit, customer-owned, and safe to run in CI.
+Use Waza evals to justify changes to model routing, instruction files, prompt templates, agent definitions, Model Context Protocol (MCP) configuration, and memory strategy. Keep the eval data small, explicit, customer-owned, and safe to run in CI.

From 0ff9b9bf62476b293003d9bb1506a4e04e7564db Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 12 May 2026 21:06:06 +0000
Subject: [PATCH 4/8] Note Waza documentation availability check

---
 labs/08-ai-evals-and-observability.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/labs/08-ai-evals-and-observability.md b/labs/08-ai-evals-and-observability.md
index 8bb5e84..66bc4bf 100644
--- a/labs/08-ai-evals-and-observability.md
+++ b/labs/08-ai-evals-and-observability.md
@@ -13,6 +13,8 @@ Use Waza for this chapter's lab and examples:
 - Typical commands: `waza run`, `waza check`, `waza compare`, `waza coverage`, and `waza tokens`.
 - CI integration: run Waza in GitHub Actions and publish the result files as workflow artifacts.
 
+Before delivery, confirm attendees can access the Waza documentation and repository from their network.
+
 This repository includes the minimum workspace artifacts Waza and VS Code customizations need:
 
 - `skills/token-optimization/SKILL.md` for the Waza skill under evaluation.

From 06a1fd919609642d758e7c060b0e8b1e7f38ef68 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 12 May 2026 21:06:55 +0000
Subject: [PATCH 5/8] Align Waza workflow file naming

---
 .github/workflows/{waza-evals.yml => waza-eval-suite.yml} | 0
 labs/08-ai-evals-and-observability.md                     | 4 ++--
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename .github/workflows/{waza-evals.yml => waza-eval-suite.yml} (100%)

diff --git a/.github/workflows/waza-evals.yml b/.github/workflows/waza-eval-suite.yml
similarity index 100%
rename from .github/workflows/waza-evals.yml
rename to .github/workflows/waza-eval-suite.yml
diff --git a/labs/08-ai-evals-and-observability.md b/labs/08-ai-evals-and-observability.md
index 66bc4bf..bd2aaf4 100644
--- a/labs/08-ai-evals-and-observability.md
+++ b/labs/08-ai-evals-and-observability.md
@@ -23,7 +23,7 @@ This repository includes the minimum workspace artifacts Waza and VS Code custom
 - `.github/instructions/token-optimization.instructions.md` as shared instructions.
 - `.github/agents/token-eval-reviewer.agent.md` as a VS Code custom agent.
 - `evals/token-optimization/eval.yaml` and task files for Waza.
-- `.github/workflows/waza-evals.yml` to run the eval suite in Actions.
+- `.github/workflows/waza-eval-suite.yml` to run the eval suite in Actions.
 
 ## What to evaluate
 
@@ -58,7 +58,7 @@ Start with 10-20 examples:
 
 ## Hands-on lab
 
-1. Open the repository's Waza workflow in `.github/workflows/waza-evals.yml`.
+1. Open the repository's Waza workflow in `.github/workflows/waza-eval-suite.yml`.
 2. Review the skill, prompt, instruction, and agent customization files.
 3. Run the Waza workflow from the Actions tab or by opening a pull request.
 4. Download the Waza result artifacts and identify which task failed or passed.

From 5d612aecee3bacf734dd093a36f002a39f452325 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 12 May 2026 21:07:47 +0000
Subject: [PATCH 6/8] Clarify VS Code agent customization wording

---
 labs/08-ai-evals-and-observability.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/labs/08-ai-evals-and-observability.md b/labs/08-ai-evals-and-observability.md
index bd2aaf4..c03927e 100644
--- a/labs/08-ai-evals-and-observability.md
+++ b/labs/08-ai-evals-and-observability.md
@@ -21,7 +21,7 @@ This repository includes the minimum workspace artifacts Waza and VS Code custom
 - `.github/skills/token-optimization.skill.md` as a workspace skill customization.
 - `.github/prompts/context-triage.prompt.md` as a reusable prompt.
 - `.github/instructions/token-optimization.instructions.md` as shared instructions.
-- `.github/agents/token-eval-reviewer.agent.md` as a VS Code custom agent.
+- `.github/agents/token-eval-reviewer.agent.md` as a VS Code agent customization.
 - `evals/token-optimization/eval.yaml` and task files for Waza.
 - `.github/workflows/waza-eval-suite.yml` to run the eval suite in Actions.
 

From e569580b0db7d45cc66a53d9cfb3d64d64a79bd8 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 12 May 2026 21:08:32 +0000
Subject: [PATCH 7/8] Expand Waza workflow validation comments

---
 .github/workflows/waza-eval-suite.yml | 1 +
 evals/token-optimization/eval.yaml    | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/waza-eval-suite.yml b/.github/workflows/waza-eval-suite.yml
index 8a5486b..42d3251 100644
--- a/.github/workflows/waza-eval-suite.yml
+++ b/.github/workflows/waza-eval-suite.yml
@@ -36,6 +36,7 @@ jobs:
       - name: Install Waza
         env:
           # Pinned to a reviewed Microsoft Waza install script commit; update the commit and SHA-256 together.
+          # SHA-256 verified for this exact install.sh content on 2026-05-12.
           WAZA_INSTALL_COMMIT: bf77c759d6bee3ba578c1980fa8a176fe8d014e4
           WAZA_INSTALL_SHA256: f9f949c8ef7ed4e3309cee3f1285d2f7a7dea6db1c965ceab09443d3c7910d7a
         run: |
diff --git a/evals/token-optimization/eval.yaml b/evals/token-optimization/eval.yaml
index 2e8fcc2..f1722b5 100644
--- a/evals/token-optimization/eval.yaml
+++ b/evals/token-optimization/eval.yaml
@@ -5,7 +5,8 @@ version: "1.0"
 
 config:
   # Use mock for pull request validation. For real model runs, change this to
-  # the executor and credential setup documented at https://microsoft.github.io/waza/.
+  # a supported Waza executor, configure the required credentials such as
+  # GITHUB_TOKEN in Actions, and follow https://microsoft.github.io/waza/.
   executor: mock
   model: mock-model
   trials_per_task: 1

From 62a0ff78b8bd9a1dc95031589a5bc3f2f0734f5b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 12 May 2026 21:09:16 +0000
Subject: [PATCH 8/8] Document pinned Waza install commit selection

---
 .github/workflows/waza-eval-suite.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/waza-eval-suite.yml b/.github/workflows/waza-eval-suite.yml
index 42d3251..6081304 100644
--- a/.github/workflows/waza-eval-suite.yml
+++ b/.github/workflows/waza-eval-suite.yml
@@ -35,7 +35,8 @@ jobs:
 
       - name: Install Waza
         env:
-          # Pinned to a reviewed Microsoft Waza install script commit; update the commit and SHA-256 together.
+          # Pinned to a Microsoft Waza repository snapshot selected from upstream docs on 2026-05-12.
+          # If this commit becomes unavailable, update to a reviewed upstream commit and SHA-256 together.
           # SHA-256 verified for this exact install.sh content on 2026-05-12.
           WAZA_INSTALL_COMMIT: bf77c759d6bee3ba578c1980fa8a176fe8d014e4
           WAZA_INSTALL_SHA256: f9f949c8ef7ed4e3309cee3f1285d2f7a7dea6db1c965ceab09443d3c7910d7a