From 3aad53c56376bf89951843933dcd6e515b5ea295 Mon Sep 17 00:00:00 2001
From: Shahzaib <shahzaib.jameel@microsoft.com>
Date: Wed, 25 Mar 2026 11:33:53 -0700
Subject: [PATCH 1/5] Add copilot review analyst skill

---
 .github/copilot-instructions.md               |   1 +
 .../skills/copilot-review-analyst/SKILL.md    | 195 ++++++++
 ...e-Review-Effectiveness-Report-Outlook.html | 432 +++++++++++++++++
 ...ilot-Code-Review-Effectiveness-Report.html | 411 ++++++++++++++++
 ...opilot-Code-Review-Effectiveness-Report.md | 270 +++++++++++
 .../references/account-map.json               |  22 +
 .../references/classification-rules.md        | 133 ++++++
 .../references/manual-audit-template.json     |  17 +
 .../references/report-formatting.md           | 175 +++++++
 .../scripts/analyze.ps1                       | 347 ++++++++++++++
 .../scripts/final-classification.ps1          | 376 +++++++++++++++
 .../scripts/precise.ps1                       | 450 ++++++++++++++++++
 12 files changed, 2829 insertions(+)
 create mode 100644 .github/skills/copilot-review-analyst/SKILL.md
 create mode 100644 .github/skills/copilot-review-analyst/assets/Copilot-Code-Review-Effectiveness-Report-Outlook.html
 create mode 100644 .github/skills/copilot-review-analyst/assets/Copilot-Code-Review-Effectiveness-Report.html
 create mode 100644 .github/skills/copilot-review-analyst/assets/Copilot-Code-Review-Effectiveness-Report.md
 create mode 100644 .github/skills/copilot-review-analyst/references/account-map.json
 create mode 100644 .github/skills/copilot-review-analyst/references/classification-rules.md
 create mode 100644 .github/skills/copilot-review-analyst/references/manual-audit-template.json
 create mode 100644 .github/skills/copilot-review-analyst/references/report-formatting.md
 create mode 100644 .github/skills/copilot-review-analyst/scripts/analyze.ps1
 create mode 100644 .github/skills/copilot-review-analyst/scripts/final-classification.ps1
 create mode 100644 .github/skills/copilot-review-analyst/scripts/precise.ps1

diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index 678689e4..8bd49e68 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -130,6 +130,7 @@ For complex investigation tasks, use these skills (read the skill file for detai
 | **design-reviewer** | `.github/skills/design-reviewer/SKILL.md` | "address review comments", "handle my review", "review comments on" |
 | **pbi-dispatcher** | `.github/skills/pbi-dispatcher/SKILL.md` | "dispatch PBIs to agent", "assign to Copilot", "send work items to coding agent" |
 | **test-planner** | `.github/skills/test-planner/SKILL.md` | "create test plan", "write test cases", "add tests to ADO", "export test plan", "E2E tests for" |
+| **copilot-review-analyst** | `.github/skills/copilot-review-analyst/SKILL.md` | "analyze Copilot reviews", "Copilot review effectiveness", "review analysis report", "how helpful are Copilot reviews" |
 
 ## 13. Azure DevOps Integration
 
diff --git a/.github/skills/copilot-review-analyst/SKILL.md b/.github/skills/copilot-review-analyst/SKILL.md
new file mode 100644
index 00000000..c2935799
--- /dev/null
+++ b/.github/skills/copilot-review-analyst/SKILL.md
@@ -0,0 +1,195 @@
+---
+name: copilot-review-analyst
+description: Analyze GitHub Copilot code review effectiveness across Android Auth repositories. Collects all Copilot inline review comments via GitHub API, classifies them as helpful/not-helpful/unresolved through reply analysis, diff verification, and AI-assisted classification, then generates a report with per-repo and per-engineer statistics. Use this skill when asked to "analyze Copilot reviews", "measure Copilot review effectiveness", "generate Copilot review report", "how helpful are Copilot reviews", "run review analysis", or any request to measure/report on GitHub Copilot code review quality and adoption.
+---
+
+# Copilot Review Analyst
+
+Analyze GitHub Copilot code review effectiveness across the Android Auth repositories by collecting all inline review comments, classifying each one, and producing a comprehensive report.
+
+## Prerequisites
+
+- **GitHub CLI (`gh`)** authenticated with access to all target repos
+- For public repos (common, msal): personal GitHub account
+- For private repos (broker): EMU account (e.g. `shjameel_microsoft`)
+- **Output directory**: `~/.copilot-review-analysis/` for final artifacts, `$env:TEMP\copilot-review-analysis\` for intermediate data
+
+## Repository Configuration
+
+Default repos (update in scripts if changed):
+
+| Label | Slug | Auth |
+|-------|------|------|
+| common | `AzureAD/microsoft-authentication-library-common-for-android` | Personal |
+| msal | `AzureAD/microsoft-authentication-library-for-android` | Personal |
+| broker | `identity-authnz-teams/ad-accounts-for-android` | EMU |
+
+## Analysis Pipeline
+
+The analysis runs in 5 sequential phases. Scripts and templates are bundled in this skill:
+- **Scripts:** `scripts/` (3 core pipeline scripts)
+- **Assets:** `assets/` (report templates — Markdown, HTML, Outlook HTML)
+- **References:** `references/` (classification rules, report formatting guide)
+
+### Phase 1: Data Collection + Keyword Classification
+
+**Script:** `scripts/analyze.ps1`
+
+Run to collect all Copilot inline review comments from human-authored PRs:
+
+```powershell
+# Default: last 60 days
+.\.github\skills\copilot-review-analyst\scripts\analyze.ps1
+
+# Custom date range:
+.\.github\skills\copilot-review-analyst\scripts\analyze.ps1 -StartDate "2026-01-23"
+```
+
+**Parameters:**
+- `-StartDate` — Start date for PR search (default: 60 days ago). Format: `YYYY-MM-DD`
+- `-OutputDir` — Output directory (default: `$env:TEMP\copilot-review-analysis`)
+
+What it does:
+1. Fetch all PRs created after `-StartDate` via `gh pr list`
+2. Filter out bot-authored PRs (Copilot, copilot-swe-agent, dependabot, github-actions)
+3. For each PR, call `repos/{slug}/pulls/{prNum}/comments` to get inline comments
+4. Filter to Copilot comments (user.login = "Copilot") that are top-level (not replies)
+5. Find human replies to each Copilot comment (matched via `in_reply_to_id`)
+6. Classify replies via keyword matching into: `helpful-acknowledged`, `unhelpful-dismissed`, `mixed-response`, `replied-unclear`, `no-response`
+
+**Outputs:**
+- `$env:TEMP\copilot-review-analysis\raw_results.json` — all comments with initial classification
+- `$env:TEMP\copilot-review-analysis\review_summaries.json` — PR-level summary comments (for reference, not classified)
+
+### Phase 2: Diff-Level Verification
+
+**Script:** `scripts/precise.ps1`
+
+For every `no-response` comment, verify whether the engineer silently acted on the feedback:
+
+```powershell
+.\.github\skills\copilot-review-analyst\scripts\precise.ps1
+```
+
+What it does:
+1. Load `raw_results.json`, filter to `no-response` comments
+2. For each comment, get the commit SHA it was left on and the PR head SHA
+3. Use `repos/{slug}/compare/{commitA}...{commitB}` to get the diff
+4. For **suggestion blocks**: extract code tokens, check if they appear as `+` lines in the diff
+5. For **prose comments**: check if diff hunk line ranges overlap the comment's line range (±5 line tolerance)
+
+**Verdicts assigned:**
+- `suggestion-applied` — suggestion tokens match diff + lines overlap
+- `suggestion-likely-applied` — tokens match but lines don't overlap
+- `exact-lines-modified` — prose comment's lines were modified
+- `lines-modified-different-fix` — nearby lines modified, different code
+- `file-changed-elsewhere` — file modified but at different lines
+- `file-changed-no-line-info` — file modified but comment had no line number
+- `file-not-changed` — file untouched after the comment
+- `no-subsequent-commits` — PR merged without any commits after the review
+
+**Output:** `$env:TEMP\copilot-review-analysis\precise.json`
+
+### Phase 3: AI-Assisted Reply Classification
+
+This phase is **manual** — performed by the agent (you) in conversation. Read the `replied-unclear` comments from `raw_results.json` and classify each one based on the reply text.
+
+See [references/classification-rules.md](references/classification-rules.md) for the full classification hierarchy and patterns.
+
+**Process:**
+1. Load `raw_results.json` and filter to `Classification -eq "replied-unclear"`
+2. For each comment, read the `HumanReplyText` and `CommentBody`
+3. Apply the classification cascade from the rules reference
+4. Group results: which are helpful (acknowledged action), which are not helpful (explained away / dismissed), which are genuinely ambiguous
+5. For genuinely ambiguous ones, apply domain context judgment
+6. Also review `file-changed-elsewhere` and `file-changed-no-line-info` verdicts from Phase 2 to identify re-audit flips
+
+**Output:** Write results to `$env:TEMP\copilot-review-analysis\manual-audit.json` using the template at `references/manual-audit-template.json`. This file is consumed by Phase 4.
+
+### Phase 4: Final Classification
+
+**Script:** `scripts/final-classification.ps1`
+
+Merge all results into a single authoritative dataset:
+
+```powershell
+.\.github\skills\copilot-review-analyst\scripts\final-classification.ps1 `
+    -AccountMapFile ".github\skills\copilot-review-analyst\references\account-map.json" `
+    -ManualAuditFile "$env:TEMP\copilot-review-analysis\manual-audit.json"
+```
+
+**Parameters:**
+- `-OutputDir` — Directory with `raw_results.json` and `precise.json` (default: `$env:TEMP\copilot-review-analysis`)
+- `-AccountMapFile` — Path to JSON mapping GitHub logins to display names. See `references/account-map.json` for the current team. If omitted, raw GitHub logins are used.
+- `-ManualAuditFile` — Path to JSON with Phase 3 manual audit decisions. See `references/manual-audit-template.json` for the schema. If omitted, all ambiguous comments default to "not-helpful".
+
+What it does:
+1. Load `raw_results.json` and `precise.json`
+2. Load account mapping and manual audit decisions from external JSON files
+3. Classify every comment using the full hierarchy:
+   - Replied + positive → helpful
+   - Replied + negative → not-helpful
+   - Replied + delegated (@copilot) → helpful
+   - Replied + acknowledged action → helpful
+   - Replied + explained-away → not-helpful
+   - Replied + genuinely unclear → check manual audit file, else not-helpful
+   - No response + suggestion-applied/exact-lines-modified → helpful
+   - No response + file-changed-elsewhere → check re-audit list from manual audit file
+   - No response + file-not-changed/no-subsequent-commits → not-helpful (**note: conservative — see Key Principle below**)
+4. Produce per-engineer and per-repo statistics
+
+**Output:** `$env:TEMP\copilot-review-analysis\final_classification.json`
+
+### Phase 5: Report Generation
+
+Generate both Markdown and Outlook-compatible HTML reports.
+
+**Style/structure references** (in `assets/` — these contain data from the Jan-Mar 2026 analysis and serve as structural templates, NOT to be copied verbatim):
+- `assets/Copilot-Code-Review-Effectiveness-Report.md` — Markdown reference
+- `assets/Copilot-Code-Review-Effectiveness-Report-Outlook.html` — Outlook HTML reference
+- `assets/Copilot-Code-Review-Effectiveness-Report.html` — Standard HTML reference
+
+**Important:** The asset templates contain hardcoded numbers (557 comments, specific percentages, engineer names, etc.) from the first analysis. For each new run, generate fresh reports using the same section structure and formatting patterns but with statistics computed from `final_classification.json`.
+
+**Generate two versions of each report:**
+1. **Team-internal** — uses real engineer names (from account map). For the team.
+2. **Org-wide** — anonymizes engineers as "Engineer A", "Engineer B", etc., sorted by helpfulness descending. For sharing outside the team.
+
+**Process:**
+1. Load `final_classification.json`
+2. Compute aggregate statistics (total, per-repo, per-engineer)
+3. Generate reports using the section structure and formatting from the asset templates
+4. Collect notable examples for "What Copilot Is Good At" and "What Copilot Struggles With"
+5. Save to `~/.copilot-review-analysis/`:
+   - `Copilot-Code-Review-Effectiveness-Report.md` (team, real names)
+   - `Copilot-Code-Review-Effectiveness-Report-Anonymous.md` (org-wide)
+   - `Copilot-Code-Review-Effectiveness-Report-Outlook.html` (team, real names)
+   - `Copilot-Code-Review-Effectiveness-Report-Outlook-Anonymous.html` (org-wide)
+
+See [references/report-formatting.md](references/report-formatting.md) for the report structure and Outlook HTML formatting rules.
+
+## Key Principle: "Unresolved" ≠ "Not Helpful"
+
+Comments with no reply and no diff evidence are **Unresolved**, not assumed unhelpful. This is a critical distinction:
+- **Confirmed Helpful** = positive evidence (explicit acknowledgment OR verified fix in diff)
+- **Confirmed Not Helpful** = positive evidence (explicit dismissal with stated reason OR comment on stale code)
+- **Unresolved** = insufficient evidence either way (engineer never engaged)
+
+The `final-classification.ps1` script classifies no-response/no-diff-evidence comments as "not-helpful" for conservative stats, but the report should present the three-way breakdown to be honest about uncertainty.
+
+## Copilot Comment Identification
+
+- Copilot inline review comments use `user.login = "Copilot"`
+- Legacy bot: `copilot-pull-request-reviewer[bot]`
+- Bot PR authors to exclude: `app/copilot-swe-agent`, `dependabot[bot]`, `github-actions[bot]`
+- Only count top-level comments (`in_reply_to_id` is null/0), not Copilot's own replies
+
+## Rate Limiting
+
+The scripts call the GitHub API heavily. Built-in mitigations:
+- PR comment caching (fetched once per PR)
+- Diff caching (fetched once per commit range)
+- Sleep every 15 PRs (300ms) and every 25 diff checks (200ms)
+- Use `--paginate` for repos with many comments
+
+If hitting rate limits, increase sleep intervals or use `GH_TOKEN` with higher rate limits.
diff --git a/.github/skills/copilot-review-analyst/assets/Copilot-Code-Review-Effectiveness-Report-Outlook.html b/.github/skills/copilot-review-analyst/assets/Copilot-Code-Review-Effectiveness-Report-Outlook.html
new file mode 100644
index 00000000..4963fed1
--- /dev/null
+++ b/.github/skills/copilot-review-analyst/assets/Copilot-Code-Review-Effectiveness-Report-Outlook.html
@@ -0,0 +1,432 @@
+<!DOCTYPE html>
+<html lang="en" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<!--[if mso]>
+<noscript>
+<xml>
+<o:OfficeDocumentSettings>
+<o:PixelsPerInch>96</o:PixelsPerInch>
+</o:OfficeDocumentSettings>
+</xml>
+</noscript>
+<![endif]-->
+<title>Copilot Code Review Effectiveness Analysis</title>
+</head>
+<body style="font-family:Segoe UI,Helvetica,Arial,sans-serif;color:#1f2328;line-height:1.6;background:#ffffff;margin:0;padding:0;"><table cellpadding="0" cellspacing="0" border="0" width="100%" style="background:#ffffff;"><tr><td align="center" style="padding:20px 10px;"><table cellpadding="0" cellspacing="0" border="0" width="1000" style="background:#ffffff;"><tr><td style="padding:20px 28px;">
+
+<p style="font-size:26px;font-weight:bold;margin:0 0 4px;mso-line-height-rule:exactly;">Copilot Code Review Effectiveness Analysis</p>
+<p style="color:#656d76;font-size:14px;margin:0 0 28px;">Android Auth Platform &nbsp;|&nbsp; January 23 – March 23, 2026 &nbsp;|&nbsp; Common, MSAL, Broker Repositories</p>
+
+<!-- ==================== BACKGROUND ==================== -->
+<p style="margin:32px 0 0;mso-line-height-rule:exactly;">&nbsp;</p><table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:0 0 14px;"><tr><td style="background:#c8e1ff;border-left:5px solid #0969da;padding:10px 16px;"><font size="4" face="Segoe UI,Helvetica,Arial,sans-serif"><b>Background</b></font></td></tr></table>
+
+<p style="font-size:14px;line-height:1.7;">The Android Auth Platform team builds and maintains the authentication libraries used by Microsoft's mobile apps — including MSAL (client-side), Common (shared IPC and utilities), and Broker (the brokered authentication service running on Android devices). Our codebase spans three repositories with 10 active contributors.</p>
+
+<p style="font-size:14px;line-height:1.7;">Earlier this year, we enabled <strong>GitHub Copilot code reviews</strong> across all three repositories. We also added <strong>custom review instructions</strong> via <code>copilot-instructions.md</code> to give the AI context about our architecture, coding conventions, and multi-repo structure — aiming to make its feedback more relevant than generic suggestions.</p>
+
+<p style="font-size:14px;line-height:1.7;">This report answers the question: <strong>Is Copilot code review actually useful for our team?</strong> We looked at every comment Copilot left over 2 months and determined — through reply analysis, commit diff verification, and AI-assisted classification of ambiguous cases — whether each piece of feedback led to a real code improvement, was dismissed as irrelevant, or was simply never evaluated by the engineer.</p>
+
+<!-- ==================== Key Findings ==================== -->
+<p style="margin:32px 0 0;mso-line-height-rule:exactly;">&nbsp;</p><table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:0 0 14px;"><tr><td style="background:#c8e1ff;border-left:5px solid #0969da;padding:10px 16px;"><font size="4" face="Segoe UI,Helvetica,Arial,sans-serif"><b>At a Glance</b></font></td></tr></table>
+
+<p style="font-size:15px;line-height:1.7;">We analyzed <strong>every inline code review comment</strong> left by GitHub Copilot on human-authored pull requests across our three Android Auth repositories over the past two months. For each of the <strong>557 comments</strong>, we determined whether the feedback led to a concrete code improvement.</p>
+
+<!-- Summary cards as table -->
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:20px 0;">
+<tr>
+<td width="25%" style="padding:6px;">
+  <table cellpadding="0" cellspacing="0" border="0" width="100%" style="border:1px solid #d0d7de;border-left:4px solid #656d76;border-radius:6px;">
+  <tr><td style="padding:14px;text-align:center;">
+    <div style="font-size:30px;font-weight:700;color:#656d76;">57%</div>
+    <div style="font-size:12px;color:#656d76;">of comments received<br><strong>no response</strong></div>
+  </td></tr>
+  </table>
+</td>
+<td width="25%" style="padding:6px;">
+  <table cellpadding="0" cellspacing="0" border="0" width="100%" style="border:1px solid #d0d7de;border-left:4px solid #2da44e;border-radius:6px;">
+  <tr><td style="padding:14px;text-align:center;">
+    <div style="font-size:30px;font-weight:700;color:#2da44e;">41%</div>
+    <div style="font-size:12px;color:#656d76;">confirmed<br><strong>helpful</strong></div>
+  </td></tr>
+  </table>
+</td>
+<td width="25%" style="padding:6px;">
+  <table cellpadding="0" cellspacing="0" border="0" width="100%" style="border:1px solid #d0d7de;border-left:4px solid #cf222e;border-radius:6px;">
+  <tr><td style="padding:14px;text-align:center;">
+    <div style="font-size:30px;font-weight:700;color:#cf222e;">18%</div>
+    <div style="font-size:12px;color:#656d76;">confirmed<br><strong>not helpful</strong></div>
+  </td></tr>
+  </table>
+</td>
+<td width="25%" style="padding:6px;">
+  <table cellpadding="0" cellspacing="0" border="0" width="100%" style="border:1px solid #d0d7de;border-left:4px solid #bf8700;border-radius:6px;">
+  <tr><td style="padding:14px;text-align:center;">
+    <div style="font-size:30px;font-weight:700;color:#bf8700;">41%</div>
+    <div style="font-size:12px;color:#656d76;">unresolved<br><strong>(inconclusive)</strong></div>
+  </td></tr>
+  </table>
+</td>
+</tr>
+</table>
+
+<!-- Callout -->
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:16px 0;">
+<tr>
+<td style="background:#ddf4ff;border-left:4px solid #0969da;padding:14px 18px;border-radius:0 4px 4px 0;font-size:14px;">
+<strong style="display:block;margin-bottom:4px;">The biggest finding isn't about AI quality — it's about adoption.</strong>
+Of the 557 comments Copilot left, only 239 (43%) received any response from an engineer — the other 318 were never acknowledged. Of those 239 responded-to comments, <strong>144 (60%) led to confirmed code improvements</strong>. That's a strong signal: when engineers read and evaluate Copilot's feedback, the majority of it is useful. The challenge is that 57% of comments are never evaluated at all, leaving 226 (41%) in an unresolved state where we can't determine whether they were helpful or not.
+</td>
+</tr>
+</table>
+
+<!-- ==================== SCOPE ==================== --><p style="font-size:15px;font-weight:bold;margin:24px 0 10px;mso-line-height-rule:exactly;">Scope</p>
+
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:16px 0;">
+<tr>
+<td width="25%" style="padding:6px;">
+  <table cellpadding="0" cellspacing="0" border="0" width="100%" style="border:1px solid #d0d7de;border-radius:6px;">
+  <tr><td style="padding:14px;text-align:center;">
+    <div style="font-size:28px;font-weight:700;">163</div>
+    <div style="font-size:12px;color:#656d76;">Human PRs<br>scanned</div>
+  </td></tr>
+  </table>
+</td>
+<td width="25%" style="padding:6px;">
+  <table cellpadding="0" cellspacing="0" border="0" width="100%" style="border:1px solid #d0d7de;border-radius:6px;">
+  <tr><td style="padding:14px;text-align:center;">
+    <div style="font-size:28px;font-weight:700;">113</div>
+    <div style="font-size:12px;color:#656d76;">PRs received<br>Copilot review (69%)</div>
+  </td></tr>
+  </table>
+</td>
+<td width="25%" style="padding:6px;">
+  <table cellpadding="0" cellspacing="0" border="0" width="100%" style="border:1px solid #d0d7de;border-radius:6px;">
+  <tr><td style="padding:14px;text-align:center;">
+    <div style="font-size:28px;font-weight:700;">557</div>
+    <div style="font-size:12px;color:#656d76;">Inline review<br>comments</div>
+  </td></tr>
+  </table>
+</td>
+<td width="25%" style="padding:6px;">
+  <table cellpadding="0" cellspacing="0" border="0" width="100%" style="border:1px solid #d0d7de;border-radius:6px;">
+  <tr><td style="padding:14px;text-align:center;">
+    <div style="font-size:28px;font-weight:700;">4.9</div>
+    <div style="font-size:12px;color:#656d76;">Avg comments<br>per reviewed PR</div>
+  </td></tr>
+  </table>
+</td>
+</tr>
+</table>
+
+<!-- ==================== OVERALL RESULTS ==================== -->
+<p style="margin:32px 0 0;mso-line-height-rule:exactly;">&nbsp;</p><table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:0 0 14px;"><tr><td style="background:#c8e1ff;border-left:5px solid #0969da;padding:10px 16px;"><font size="4" face="Segoe UI,Helvetica,Arial,sans-serif"><b>Overall Results</b></font></td></tr></table>
+
+<p style="font-size:15px;font-weight:bold;margin:24px 0 10px;mso-line-height-rule:exactly;">Engineer Response Rate</p>
+<p style="font-size:14px;">Before looking at helpfulness, it's important to understand how engineers interact with Copilot reviews — because a comment can only demonstrate value if someone reads it.</p>
+
+<!-- Response rate bar -->
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:12px 0;height:32px;">
+<tr>
+<td width="43%" style="background:#2da44e;color:#fff;font-size:12px;font-weight:600;text-align:center;padding:6px;">43% replied</td>
+<td width="57%" style="background:#656d76;color:#fff;font-size:12px;font-weight:600;text-align:center;padding:6px;">57% ignored</td>
+</tr>
+</table>
+<table cellpadding="0" cellspacing="0" border="0" style="font-size:13px;margin:6px 0;">
+<tr>
+<td style="padding:2px 4px;"><table cellpadding="0" cellspacing="0" border="0"><tr><td style="width:12px;height:12px;background:#2da44e;">&nbsp;</td></tr></table></td>
+<td style="padding:2px 8px 2px 4px;color:#656d76;">Engineer replied (239)</td>
+<td style="padding:2px 4px;"><table cellpadding="0" cellspacing="0" border="0"><tr><td style="width:12px;height:12px;background:#656d76;">&nbsp;</td></tr></table></td>
+<td style="padding:2px 8px 2px 4px;color:#656d76;">No response (318)</td>
+</tr>
+</table>
+
+<p style="font-size:14px;">More than half of all Copilot review comments receive no human response. The majority of AI feedback enters a void — it may be valid, but we can never confirm its value if no one engages with it.</p>
+
+<p style="font-size:15px;font-weight:bold;margin:24px 0 10px;mso-line-height-rule:exactly;">Helpfulness Verdict</p>
+
+<!-- Helpfulness bar -->
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:12px 0;height:36px;">
+<tr>
+<td width="41%" style="background:#2da44e;color:#fff;font-size:13px;font-weight:600;text-align:center;padding:6px;">41.3% Helpful</td>
+<td width="18%" style="background:#cf222e;color:#fff;font-size:12px;font-weight:600;text-align:center;padding:6px;">18.1%</td>
+<td width="41%" style="background:#bf8700;color:#fff;font-size:13px;font-weight:600;text-align:center;padding:6px;">40.6% Unresolved</td>
+</tr>
+</table>
+<table cellpadding="0" cellspacing="0" border="0" style="font-size:13px;margin:6px 0;">
+<tr>
+<td style="padding:2px 4px;"><table cellpadding="0" cellspacing="0" border="0"><tr><td style="width:12px;height:12px;background:#2da44e;">&nbsp;</td></tr></table></td>
+<td style="padding:2px 8px 2px 4px;color:#656d76;">Confirmed Helpful (230)</td>
+<td style="padding:2px 4px;"><table cellpadding="0" cellspacing="0" border="0"><tr><td style="width:12px;height:12px;background:#cf222e;">&nbsp;</td></tr></table></td>
+<td style="padding:2px 8px 2px 4px;color:#656d76;">Confirmed Not Helpful (101)</td>
+<td style="padding:2px 4px;"><table cellpadding="0" cellspacing="0" border="0"><tr><td style="width:12px;height:12px;background:#bf8700;">&nbsp;</td></tr></table></td>
+<td style="padding:2px 8px 2px 4px;color:#656d76;">Unresolved (226)</td>
+</tr>
+</table>
+
+<!-- Verdict definitions table -->
+<table cellpadding="0" cellspacing="0" border="1" bordercolor="#d0d7de" width="100%" style="border-collapse:collapse;font-size:13px;margin:14px 0;">
+<tr style="background:#ffffff;">
+  <th style="padding:8px 10px;text-align:left;font-weight:600;">Verdict</th>
+  <th style="padding:8px 10px;text-align:right;font-weight:600;">Count</th>
+  <th style="padding:8px 10px;text-align:right;font-weight:600;">%</th>
+  <th style="padding:8px 10px;text-align:left;font-weight:600;">Definition</th>
+</tr>
+<tr>
+  <td style="padding:8px 10px;font-weight:600;color:#2da44e;">Confirmed Helpful</td>
+  <td style="padding:8px 10px;text-align:right;">230</td>
+  <td style="padding:8px 10px;text-align:right;">41.3%</td>
+  <td style="padding:8px 10px;">The comment led to a code change — engineer explicitly acknowledged it, or the suggested fix was verified in a subsequent commit diff.</td>
+</tr>
+<tr>
+  <td style="padding:8px 10px;font-weight:600;color:#cf222e;">Confirmed Not Helpful</td>
+  <td style="padding:8px 10px;text-align:right;">101</td>
+  <td style="padding:8px 10px;text-align:right;">18.1%</td>
+  <td style="padding:8px 10px;">The engineer explicitly dismissed the comment with a reason why the feedback was incorrect, irrelevant, or by design.</td>
+</tr>
+<tr>
+  <td style="padding:8px 10px;font-weight:600;color:#bf8700;">Unresolved</td>
+  <td style="padding:8px 10px;text-align:right;">226</td>
+  <td style="padding:8px 10px;text-align:right;">40.6%</td>
+  <td style="padding:8px 10px;">No reply and no definitive diff evidence. All 226 are comments where the engineer did not respond. Includes comments on the final commit before merge, files modified at different lines, and comments with no line number. Could be valid feedback that was never evaluated.</td>
+</tr>
+</table>
+
+<!-- Warning callout -->
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:16px 0;">
+<tr>
+<td style="background:#fff8c5;border-left:4px solid #bf8700;padding:14px 18px;border-radius:0 4px 4px 0;font-size:14px;">
+<strong style="display:block;margin-bottom:4px;">226 comments (41%) are unresolved — not because the AI was wrong, but because no one engaged.</strong>
+All 226 are comments that received no reply from the engineer. This includes 122 comments on the final commit before merge (where the engineer had no subsequent commits), 45 where the file was modified but at different lines, 50 where the comment had no line number for verification, and 9 where the file was never modified. If engineers had responded — even just to dismiss — we would know whether the feedback was useful.
+</td>
+</tr>
+</table>
+
+<p style="font-size:15px;font-weight:bold;margin:24px 0 10px;mso-line-height-rule:exactly;">How Helpful Comments Were Delivered</p>
+<table cellpadding="0" cellspacing="0" border="1" bordercolor="#d0d7de" width="100%" style="border-collapse:collapse;font-size:13px;margin:14px 0;">
+<tr style="background:#ffffff;">
+  <th style="padding:8px 10px;text-align:left;">Path</th>
+  <th style="padding:8px 10px;text-align:right;">Count</th>
+  <th style="padding:8px 10px;text-align:left;">Description</th>
+</tr>
+<tr><td style="padding:8px 10px;">Engineer replied and acknowledged</td><td style="padding:8px 10px;text-align:right;">144</td><td style="padding:8px 10px;">"good catch", "fixed", "addressed", "added unit test", "@copilot apply changes"</td></tr>
+<tr><td style="padding:8px 10px;">Engineer silently applied the fix</td><td style="padding:8px 10px;text-align:right;">86</td><td style="padding:8px 10px;">No reply, but suggestion code or exact line range verified as modified in subsequent commit</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Total Confirmed Helpful</td><td style="padding:8px 10px;text-align:right;font-weight:600;">230</td><td style="padding:8px 10px;"></td></tr>
+</table>
+
+<p style="font-size:15px;font-weight:bold;margin:24px 0 10px;mso-line-height-rule:exactly;">How Not Helpful Comments Were Identified</p>
+<table cellpadding="0" cellspacing="0" border="1" bordercolor="#d0d7de" width="100%" style="border-collapse:collapse;font-size:13px;margin:14px 0;">
+<tr style="background:#ffffff;">
+  <th style="padding:8px 10px;text-align:left;">Path</th>
+  <th style="padding:8px 10px;text-align:right;">Count</th>
+  <th style="padding:8px 10px;text-align:left;">Description</th>
+</tr>
+<tr><td style="padding:8px 10px;">Engineer replied and dismissed</td><td style="padding:8px 10px;text-align:right;">95</td><td style="padding:8px 10px;">"won't fix", "this is fine", "not applicable", "Copilot is incorrect"</td></tr>
+<tr><td style="padding:8px 10px;">Comment on stale/outdated code</td><td style="padding:8px 10px;text-align:right;">6</td><td style="padding:8px 10px;">Comment was on code already changed in a different commit</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Total Confirmed Not Helpful</td><td style="padding:8px 10px;text-align:right;font-weight:600;">101</td><td style="padding:8px 10px;"></td></tr>
+</table>
+
+<!-- ==================== PER REPO ==================== -->
+<p style="margin:32px 0 0;mso-line-height-rule:exactly;">&nbsp;</p><table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:0 0 14px;"><tr><td style="background:#c8e1ff;border-left:5px solid #0969da;padding:10px 16px;"><font size="4" face="Segoe UI,Helvetica,Arial,sans-serif"><b>Results by Repository</b></font></td></tr></table>
+
+<!-- Broker bar -->
+<p style="font-size:13px;font-weight:600;margin:14px 0 4px;">Broker (293 comments)</p>
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="height:24px;">
+<tr>
+<td width="49%" style="background:#2da44e;color:#fff;font-size:11px;font-weight:600;text-align:center;padding:4px;">49%</td>
+<td width="16%" style="background:#cf222e;color:#fff;font-size:11px;font-weight:600;text-align:center;padding:4px;">16%</td>
+<td width="35%" style="background:#bf8700;color:#fff;font-size:11px;font-weight:600;text-align:center;padding:4px;">35%</td>
+</tr>
+</table>
+
+<!-- Common bar -->
+<p style="font-size:13px;font-weight:600;margin:14px 0 4px;">Common (188 comments)</p>
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="height:24px;">
+<tr>
+<td width="36%" style="background:#2da44e;color:#fff;font-size:11px;font-weight:600;text-align:center;padding:4px;">36%</td>
+<td width="18%" style="background:#cf222e;color:#fff;font-size:11px;font-weight:600;text-align:center;padding:4px;">18%</td>
+<td width="46%" style="background:#bf8700;color:#fff;font-size:11px;font-weight:600;text-align:center;padding:4px;">46%</td>
+</tr>
+</table>
+
+<!-- MSAL bar -->
+<p style="font-size:13px;font-weight:600;margin:14px 0 4px;">MSAL (76 comments)</p>
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="height:24px;">
+<tr>
+<td width="26%" style="background:#2da44e;color:#fff;font-size:11px;font-weight:600;text-align:center;padding:4px;">26%</td>
+<td width="25%" style="background:#cf222e;color:#fff;font-size:11px;font-weight:600;text-align:center;padding:4px;">25%</td>
+<td width="49%" style="background:#bf8700;color:#fff;font-size:11px;font-weight:600;text-align:center;padding:4px;">49%</td>
+</tr>
+</table>
+
+<table cellpadding="0" cellspacing="0" border="0" style="font-size:13px;margin:6px 0 16px;">
+<tr>
+<td style="padding:2px 4px;"><table cellpadding="0" cellspacing="0" border="0"><tr><td style="width:12px;height:12px;background:#2da44e;">&nbsp;</td></tr></table></td>
+<td style="padding:2px 8px 2px 4px;color:#656d76;">Helpful</td>
+<td style="padding:2px 4px;"><table cellpadding="0" cellspacing="0" border="0"><tr><td style="width:12px;height:12px;background:#cf222e;">&nbsp;</td></tr></table></td>
+<td style="padding:2px 8px 2px 4px;color:#656d76;">Not Helpful</td>
+<td style="padding:2px 4px;"><table cellpadding="0" cellspacing="0" border="0"><tr><td style="width:12px;height:12px;background:#bf8700;">&nbsp;</td></tr></table></td>
+<td style="padding:2px 8px 2px 4px;color:#656d76;">Unresolved</td>
+</tr>
+</table>
+
+<table cellpadding="0" cellspacing="0" border="1" bordercolor="#d0d7de" width="100%" style="border-collapse:collapse;font-size:13px;margin:14px 0;">
+<tr style="background:#ffffff;">
+  <th style="padding:8px 10px;text-align:left;">Repository</th>
+  <th style="padding:8px 10px;text-align:right;">Comments</th>
+  <th style="padding:8px 10px;text-align:right;">Response Rate</th>
+  <th style="padding:8px 10px;text-align:right;font-weight:600;background:#dafbe1;">Helpful</th><th style="padding:8px 10px;text-align:right;font-weight:600;background:#ffebe9;">Not Helpful</th><th style="padding:8px 10px;text-align:right;font-weight:600;background:#fff8c5;">Unresolved</th>
+</tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Broker</td><td style="padding:8px 10px;text-align:right;">293</td><td style="padding:8px 10px;text-align:right;">56.0%</td><td style="padding:8px 10px;text-align:right;background:#dafbe1;">142 (48.5%)</td><td style="padding:8px 10px;text-align:right;background:#ffebe9;">48 (16.4%)</td><td style="padding:8px 10px;text-align:right;background:#fff8c5;">103 (35.2%)</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Common</td><td style="padding:8px 10px;text-align:right;">188</td><td style="padding:8px 10px;text-align:right;">29.8%</td><td style="padding:8px 10px;text-align:right;background:#dafbe1;">68 (36.2%)</td><td style="padding:8px 10px;text-align:right;background:#ffebe9;">34 (18.1%)</td><td style="padding:8px 10px;text-align:right;background:#fff8c5;">86 (45.7%)</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">MSAL</td><td style="padding:8px 10px;text-align:right;">76</td><td style="padding:8px 10px;text-align:right;">25.0%</td><td style="padding:8px 10px;text-align:right;background:#dafbe1;">20 (26.3%)</td><td style="padding:8px 10px;text-align:right;background:#ffebe9;">19 (25.0%)</td><td style="padding:8px 10px;text-align:right;background:#fff8c5;">37 (48.7%)</td></tr>
+</table>
+
+<!-- ==================== PER ENGINEER ==================== -->
+<p style="margin:32px 0 0;mso-line-height-rule:exactly;">&nbsp;</p><table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:0 0 14px;"><tr><td style="background:#c8e1ff;border-left:5px solid #0969da;padding:10px 16px;"><font size="4" face="Segoe UI,Helvetica,Arial,sans-serif"><b>Results by Engineer</b></font></td></tr></table>
+<p style="font-size:13px;color:#656d76;">Each engineer has two GitHub accounts (personal + EMU). These have been merged. Names are anonymized.</p>
+
+<table cellpadding="0" cellspacing="0" border="1" bordercolor="#d0d7de" width="100%" style="border-collapse:collapse;font-size:13px;margin:14px 0;">
+<tr style="background:#ffffff;">
+  <th style="padding:8px 10px;text-align:left;font-weight:600;">Engineer</th>
+  <th style="padding:8px 10px;text-align:right;font-weight:600;">Comments</th>
+  <th style="padding:8px 10px;text-align:right;font-weight:600;">Replied</th>
+  <th style="padding:8px 10px;text-align:right;font-weight:600;">Response Rate</th>
+  <th style="padding:8px 10px;text-align:right;font-weight:600;background:#dafbe1;">Helpful</th>
+  <th style="padding:8px 10px;text-align:right;font-weight:600;background:#ffebe9;">Not Helpful</th>
+  <th style="padding:8px 10px;text-align:right;font-weight:600;background:#fff8c5;">Unresolved</th>
+  <th style="padding:8px 10px;text-align:right;font-weight:600;">Helpfulness</th>
+</tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Engineer A</td><td style="padding:8px 10px;text-align:right;">20</td><td style="padding:8px 10px;text-align:right;">20</td><td style="padding:8px 10px;text-align:right;font-weight:600;">100%</td><td style="padding:8px 10px;text-align:right;background:#dafbe1;">14</td><td style="padding:8px 10px;text-align:right;background:#ffebe9;">6</td><td style="padding:8px 10px;text-align:right;background:#fff8c5;">0</td><td style="padding:8px 10px;text-align:right;font-weight:600;color:#2da44e;">70.0%</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Engineer B</td><td style="padding:8px 10px;text-align:right;">83</td><td style="padding:8px 10px;text-align:right;">75</td><td style="padding:8px 10px;text-align:right;font-weight:600;">90.4%</td><td style="padding:8px 10px;text-align:right;background:#dafbe1;">57</td><td style="padding:8px 10px;text-align:right;background:#ffebe9;">26</td><td style="padding:8px 10px;text-align:right;background:#fff8c5;">0</td><td style="padding:8px 10px;text-align:right;font-weight:600;color:#2da44e;">68.7%</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Engineer C</td><td style="padding:8px 10px;text-align:right;">15</td><td style="padding:8px 10px;text-align:right;">4</td><td style="padding:8px 10px;text-align:right;">26.7%</td><td style="padding:8px 10px;text-align:right;background:#dafbe1;">8</td><td style="padding:8px 10px;text-align:right;background:#ffebe9;">2</td><td style="padding:8px 10px;text-align:right;background:#fff8c5;">5</td><td style="padding:8px 10px;text-align:right;font-weight:600;">53.3%</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Engineer D</td><td style="padding:8px 10px;text-align:right;">40</td><td style="padding:8px 10px;text-align:right;">30</td><td style="padding:8px 10px;text-align:right;font-weight:600;">75.0%</td><td style="padding:8px 10px;text-align:right;background:#dafbe1;">19</td><td style="padding:8px 10px;text-align:right;background:#ffebe9;">14</td><td style="padding:8px 10px;text-align:right;background:#fff8c5;">7</td><td style="padding:8px 10px;text-align:right;font-weight:600;">47.5%</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Engineer E</td><td style="padding:8px 10px;text-align:right;">110</td><td style="padding:8px 10px;text-align:right;">37</td><td style="padding:8px 10px;text-align:right;">33.6%</td><td style="padding:8px 10px;text-align:right;background:#dafbe1;">44</td><td style="padding:8px 10px;text-align:right;background:#ffebe9;">18</td><td style="padding:8px 10px;text-align:right;background:#fff8c5;">48</td><td style="padding:8px 10px;text-align:right;">40.0%</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Engineer F</td><td style="padding:8px 10px;text-align:right;">99</td><td style="padding:8px 10px;text-align:right;">20</td><td style="padding:8px 10px;text-align:right;">20.2%</td><td style="padding:8px 10px;text-align:right;background:#dafbe1;">36</td><td style="padding:8px 10px;text-align:right;background:#ffebe9;">11</td><td style="padding:8px 10px;text-align:right;background:#fff8c5;">52</td><td style="padding:8px 10px;text-align:right;">36.4%</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Engineer G</td><td style="padding:8px 10px;text-align:right;">63</td><td style="padding:8px 10px;text-align:right;">22</td><td style="padding:8px 10px;text-align:right;">34.9%</td><td style="padding:8px 10px;text-align:right;background:#dafbe1;">20</td><td style="padding:8px 10px;text-align:right;background:#ffebe9;">12</td><td style="padding:8px 10px;text-align:right;background:#fff8c5;">31</td><td style="padding:8px 10px;text-align:right;">31.7%</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Engineer H</td><td style="padding:8px 10px;text-align:right;">100</td><td style="padding:8px 10px;text-align:right;">24</td><td style="padding:8px 10px;text-align:right;">24.0%</td><td style="padding:8px 10px;text-align:right;background:#dafbe1;">27</td><td style="padding:8px 10px;text-align:right;background:#ffebe9;">12</td><td style="padding:8px 10px;text-align:right;background:#fff8c5;">61</td><td style="padding:8px 10px;text-align:right;">27.0%</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Engineer I</td><td style="padding:8px 10px;text-align:right;">24</td><td style="padding:8px 10px;text-align:right;">6</td><td style="padding:8px 10px;text-align:right;">25.0%</td><td style="padding:8px 10px;text-align:right;background:#dafbe1;">5</td><td style="padding:8px 10px;text-align:right;background:#ffebe9;">0</td><td style="padding:8px 10px;text-align:right;background:#fff8c5;">19</td><td style="padding:8px 10px;text-align:right;">20.8%</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">Engineer J</td><td style="padding:8px 10px;text-align:right;">3</td><td style="padding:8px 10px;text-align:right;">1</td><td style="padding:8px 10px;text-align:right;">33.3%</td><td style="padding:8px 10px;text-align:right;background:#dafbe1;">0</td><td style="padding:8px 10px;text-align:right;background:#ffebe9;">0</td><td style="padding:8px 10px;text-align:right;background:#fff8c5;">3</td><td style="padding:8px 10px;text-align:right;">0%</td></tr>
+</table>
+
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:16px 0;">
+<tr>
+<td style="background:#ddf4ff;border-left:4px solid #0969da;padding:14px 18px;border-radius:0 4px 4px 0;font-size:14px;">
+<strong style="display:block;margin-bottom:4px;">Engagement drives value — and visibility.</strong>
+Engineer A (100% response rate) and Engineer B (90%) have the highest helpfulness <em>and</em> zero unresolved comments. When you engage, you know exactly what the AI got right and wrong. Engineer F (20%) and Engineer H (24%) have 52 and 61 unresolved comments respectively — over half their feedback goes into a black hole.
+</td>
+</tr>
+</table>
+
+<!-- ==================== WHAT WORKS ==================== -->
+<p style="margin:32px 0 0;mso-line-height-rule:exactly;">&nbsp;</p><table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:0 0 14px;"><tr><td style="background:#c8e1ff;border-left:5px solid #0969da;padding:10px 16px;"><font size="4" face="Segoe UI,Helvetica,Arial,sans-serif"><b>What Copilot Is Good At</b></font></td></tr></table>
+
+<p style="font-size:14px;font-weight:600;">Catching real bugs:</p>
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:4px 0 16px;"><tr><td style="border-left:3px solid #d0d7de;padding:8px 14px;background:#f6f8fa;font-size:13px;color:#656d76;">
+<em>PR #3050 (Common):</em> Copilot flagged that <code>"$it"</code> string wrapping doesn't JSON-escape the content, which could break consumers.<br>
+<strong>Engineer reply:</strong> "You're right. Making the change."
+</td></tr></table>
+
+<p style="font-size:14px;font-weight:600;">Stale documentation and naming inconsistencies:</p>
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:4px 0 16px;"><tr><td style="border-left:3px solid #d0d7de;padding:8px 14px;background:#f6f8fa;font-size:13px;color:#656d76;">
+<em>PR #64 (Broker):</em> Copilot identified four locations where KDoc still referenced the old flight constant after it was renamed. All four were silently fixed in the next commit.
+</td></tr></table>
+
+<p style="font-size:14px;font-weight:600;">CI/pipeline configuration issues:</p>
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:4px 0 16px;"><tr><td style="border-left:3px solid #d0d7de;padding:8px 14px;background:#f6f8fa;font-size:13px;color:#656d76;">
+<em>PR #3038 (Common):</em> Copilot warned that using <code>vmImage: 'windows-latest'</code> makes the CD pipeline non-deterministic. The engineer changed to a pinned image version.
+</td></tr></table>
+
+<p style="font-size:14px;font-weight:600;">The <code>@copilot apply</code> workflow:</p>
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:4px 0 16px;"><tr><td style="border-left:3px solid #d0d7de;padding:8px 14px;background:#f6f8fa;font-size:13px;color:#656d76;">
+In 16 instances (2.9%), engineers validated the feedback and delegated the fix back: <code>@copilot open a new pull request to apply changes based on [this feedback]</code>. An efficient pattern where AI identifies <em>and</em> fixes the issue.
+</td></tr></table>
+
+<!-- ==================== WHAT DOESN'T WORK ==================== -->
+<p style="margin:32px 0 0;mso-line-height-rule:exactly;">&nbsp;</p><table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:0 0 14px;"><tr><td style="background:#c8e1ff;border-left:5px solid #0969da;padding:10px 16px;"><font size="4" face="Segoe UI,Helvetica,Arial,sans-serif"><b>What Copilot Struggles With</b></font></td></tr></table>
+
+<p style="font-size:14px;font-weight:600;">Lacking domain context:</p>
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:4px 0 16px;"><tr><td style="border-left:3px solid #d0d7de;padding:8px 14px;background:#f6f8fa;font-size:13px;color:#656d76;">
+<em>Copilot:</em> "<code>shared_device_id</code> could be used for tracking — consider hashing before emission."<br>
+<em>Engineer:</em> "The shared_device_id is a random UUID and is not PII."
+</td></tr></table>
+
+<p style="font-size:14px;font-weight:600;">Suggesting tests for trivial code:</p>
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:4px 0 16px;"><tr><td style="border-left:3px solid #d0d7de;padding:8px 14px;background:#f6f8fa;font-size:13px;color:#656d76;">
+<em>Copilot:</em> "New telemetry attributes lack test coverage..."<br>
+<em>Engineer:</em> "These are just telemetry related changes and adding unit tests will be overdo here."
+</td></tr></table>
+
+<p style="font-size:14px;font-weight:600;">Misunderstanding APIs:</p>
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:4px 0 16px;"><tr><td style="border-left:3px solid #d0d7de;padding:8px 14px;background:#f6f8fa;font-size:13px;color:#656d76;">
+<em>Copilot:</em> "<code>00000003-0000-0ff1-ce00-000000000000</code> is the resource ID for Microsoft Graph, not SharePoint Online."<br>
+<em>Engineer:</em> "00000003-0000-0ff1-ce00-000000000000 is SharePoint Online."
+</td></tr></table>
+
+<!-- ==================== KEY TAKEAWAYS ==================== -->
+<p style="margin:32px 0 0;mso-line-height-rule:exactly;">&nbsp;</p><table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:0 0 14px;"><tr><td style="background:#c8e1ff;border-left:5px solid #0969da;padding:10px 16px;"><font size="4" face="Segoe UI,Helvetica,Arial,sans-serif"><b>Key Takeaways</b></font></td></tr></table>
+
+<table cellpadding="0" cellspacing="0" border="1" bordercolor="#d0d7de" width="100%" style="border-collapse:collapse;font-size:13px;margin:14px 0;">
+<tr style="background:#ffffff;"><th style="padding:8px 10px;text-align:center;width:30px;">#</th><th style="padding:8px 10px;text-align:left;">Finding</th></tr>
+<tr><td style="padding:8px 10px;text-align:center;font-weight:600;">1</td><td style="padding:8px 10px;"><strong>57% of Copilot review comments receive no response from engineers.</strong> The majority of AI review feedback is never acknowledged. Of the ignored comments, only 27% are silently addressed — the remaining 71% are unresolved.</td></tr>
+<tr><td style="padding:8px 10px;text-align:center;font-weight:600;">2</td><td style="padding:8px 10px;"><strong>41% of all comments led to a confirmed code improvement.</strong> But 41% are unresolved, meaning the true helpfulness rate lies between 41% (floor) and 82% (ceiling). We cannot narrow this range without engineer engagement.</td></tr>
+<tr><td style="padding:8px 10px;text-align:center;font-weight:600;">3</td><td style="padding:8px 10px;"><strong>When engineers engage, 60% of comments are helpful.</strong> This suggests the AI review quality itself is decent — the bottleneck is adoption, not accuracy.</td></tr>
+<tr><td style="padding:8px 10px;text-align:center;font-weight:600;">4</td><td style="padding:8px 10px;"><strong>Only 18% of comments are confirmed not helpful.</strong> When we restrict "not helpful" to comments with actual evidence of poor quality, the rate is surprisingly low.</td></tr>
+<tr><td style="padding:8px 10px;text-align:center;font-weight:600;">5</td><td style="padding:8px 10px;"><strong>Engagement is the strongest predictor of value.</strong> Engineers who reply to 75%+ of comments see 47-70% helpfulness with zero unresolved. Engineers who reply to &lt;35% see 20-40% helpfulness with massive unresolved buckets.</td></tr>
+<tr><td style="padding:8px 10px;text-align:center;font-weight:600;">6</td><td style="padding:8px 10px;"><strong>38% of ignored comments are on the final commit before merge.</strong> These 122 comments represent the last review round being skipped — the feedback had zero chance of impact regardless of its quality.</td></tr>
+<tr><td style="padding:8px 10px;text-align:center;font-weight:600;">7</td><td style="padding:8px 10px;"><strong>Broker gets the most value (49%), Common is middling (36%), MSAL is lowest (26%).</strong> This correlates with response rate: Broker 56%, Common 30%, MSAL 25%.</td></tr>
+</table>
+
+<!-- ==================== METHODOLOGY ==================== -->
+<p style="margin:32px 0 0;mso-line-height-rule:exactly;">&nbsp;</p><table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:0 0 14px;"><tr><td style="background:#c8e1ff;border-left:5px solid #0969da;padding:10px 16px;"><font size="4" face="Segoe UI,Helvetica,Arial,sans-serif"><b>How We Measured This</b></font></td></tr></table>
+
+<table cellpadding="0" cellspacing="0" border="1" bordercolor="#d0d7de" width="100%" style="border-collapse:collapse;font-size:13px;margin:14px 0;">
+<tr style="background:#ffffff;"><th style="padding:8px 10px;text-align:left;">Phase</th><th style="padding:8px 10px;text-align:left;">Method</th></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">1. Data collection</td><td style="padding:8px 10px;">GitHub API extraction of all 557 Copilot inline review comments from 163 human-authored PRs. Excluded PRs by Copilot coding agent.</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">2. Reply classification</td><td style="padding:8px 10px;">Automated keyword matching on 239 replied comments to classify as positive, negative, or ambiguous.</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">3. Diff verification</td><td style="padding:8px 10px;">For 318 unreplied comments, used GitHub compare API to check if suggestion tokens appeared as diff additions, or if exact line ranges were modified.</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">4. AI-assisted classification</td><td style="padding:8px 10px;">All 133 ambiguous replies were read and classified by GitHub Copilot, based on reply text and domain context. These classifications were reviewed by the report author but not independently verified by the original PR engineers.</td></tr>
+<tr><td style="padding:8px 10px;font-weight:600;">5. Cross-validation</td><td style="padding:8px 10px;">All initially "not helpful" classifications were re-examined against diff evidence. 18 were reclassified where evidence was strong.</td></tr>
+</table>
+
+<p style="font-size:13px;margin-top:12px;"><strong>Classification rules:</strong> "Confirmed Helpful" requires positive evidence (engineer acknowledgment or verified code change). "Confirmed Not Helpful" requires positive evidence of poor quality (explicit engineer dismissal). Comments where the engineer did not engage are classified as "Unresolved" rather than assumed unhelpful.</p>
+
+<!-- ==================== WHAT THIS MEANS ==================== -->
+<p style="margin:32px 0 0;mso-line-height-rule:exactly;">&nbsp;</p><table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin:0 0 14px;"><tr><td style="background:#c8e1ff;border-left:5px solid #0969da;padding:10px 16px;"><font size="4" face="Segoe UI,Helvetica,Arial,sans-serif"><b>What We'd Recommend</b></font></td></tr></table>
+
+<p style="font-size:14px;line-height:1.7;">Based on this analysis, we see three opportunities:</p>
+
+<p style="font-size:14px;line-height:1.7;"><strong>1. Engage with the feedback.</strong> The single biggest improvement would be for engineers to respond to Copilot's review comments — even if just to dismiss them. When engineers engage, 60% of comments are helpful. When they don't, 71% of comments become a black hole of unknown value. A quick "not applicable" is more useful than silence — it tells us whether the AI is giving good feedback, and it builds the data we need to improve the review instructions.</p>
+
+<p style="font-size:14px;line-height:1.7;"><strong>2. Review before merging.</strong> 38% of ignored comments were on the final commit — the engineer merged without looking at Copilot's last round. Building a habit of checking review comments before clicking merge would give this feedback a chance to have impact.</p>
+
+<p style="font-size:14px;line-height:1.7;"><strong>3. Improve the review instructions.</strong> The most common dismissal reasons — "lacks domain context," "suggests tests for trivial code," "misunderstands our APIs" — are things we can address by refining our <code>copilot-instructions.md</code>. Adding guidance like "don't suggest tests for telemetry-only changes" or providing context about specific domain patterns could reduce the noise and make the signal-to-noise ratio better for everyone.</p>
+
+<!-- Footer -->
+<table cellpadding="0" cellspacing="0" border="0" width="100%" style="margin-top:32px;border-top:1px solid #d0d7de;">
+<tr><td style="padding:14px 0;color:#656d76;font-size:12px;">
+<p style="margin:4px 0;">Analysis conducted March 23–24, 2026. Data covers all PRs created January 23 – March 23, 2026 in the Common, MSAL, and Broker repositories.</p>
+<p style="margin:4px 0;">Raw data (557 comments with full text, replies, diff evidence, and verdicts) available for independent verification.</p>
+</td></tr>
+</table>
+
+</td></tr></table></td></tr></table></body>
+</html>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/.github/skills/copilot-review-analyst/assets/Copilot-Code-Review-Effectiveness-Report.html b/.github/skills/copilot-review-analyst/assets/Copilot-Code-Review-Effectiveness-Report.html
new file mode 100644
index 00000000..72759b00
--- /dev/null
+++ b/.github/skills/copilot-review-analyst/assets/Copilot-Code-Review-Effectiveness-Report.html
@@ -0,0 +1,411 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Copilot Code Review Effectiveness Analysis</title>
+<style>
+  :root {
+    --green: #2da44e;
+    --red: #cf222e;
+    --amber: #bf8700;
+    --gray: #656d76;
+    --light-green: #dafbe1;
+    --light-red: #ffebe9;
+    --light-amber: #fff8c5;
+    --light-gray: #f6f8fa;
+    --border: #d0d7de;
+    --text: #1f2328;
+    --text-secondary: #656d76;
+  }
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body {
+    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
+    color: var(--text);
+    line-height: 1.6;
+    max-width: 960px;
+    margin: 0 auto;
+    padding: 40px 24px;
+    background: #fff;
+  }
+  h1 { font-size: 28px; font-weight: 600; margin-bottom: 4px; }
+  h2 { font-size: 22px; font-weight: 600; margin: 40px 0 16px; padding-bottom: 8px; border-bottom: 1px solid var(--border); }
+  h3 { font-size: 17px; font-weight: 600; margin: 28px 0 12px; }
+  p { margin: 12px 0; }
+  .subtitle { color: var(--text-secondary); font-size: 15px; margin-bottom: 32px; }
+  .lead { font-size: 16px; line-height: 1.7; }
+
+  /* Summary cards */
+  .card-grid { display: grid; grid-template-columns: repeat(4, 1fr); gap: 16px; margin: 24px 0; }
+  .card {
+    border: 1px solid var(--border); border-radius: 8px; padding: 16px;
+    text-align: center;
+  }
+  .card .number { font-size: 32px; font-weight: 700; line-height: 1.2; }
+  .card .label { font-size: 13px; color: var(--text-secondary); margin-top: 4px; }
+  .card.green { border-left: 4px solid var(--green); }
+  .card.red { border-left: 4px solid var(--red); }
+  .card.amber { border-left: 4px solid var(--amber); }
+  .card.gray { border-left: 4px solid var(--gray); }
+
+  /* Tables */
+  table { width: 100%; border-collapse: collapse; margin: 16px 0; font-size: 14px; }
+  th { background: var(--light-gray); font-weight: 600; text-align: left; padding: 10px 12px; border: 1px solid var(--border); }
+  td { padding: 10px 12px; border: 1px solid var(--border); }
+  tr:hover td { background: #f6f8fa; }
+  .right { text-align: right; }
+  .bold { font-weight: 600; }
+
+  /* Bar charts */
+  .bar-chart { margin: 20px 0; }
+  .bar-row { display: flex; align-items: center; margin: 6px 0; }
+  .bar-label { width: 100px; font-size: 13px; font-weight: 600; flex-shrink: 0; }
+  .bar-track { flex: 1; height: 28px; background: #eee; border-radius: 4px; overflow: hidden; display: flex; position: relative; }
+  .bar-fill { height: 100%; display: flex; align-items: center; justify-content: center; font-size: 11px; font-weight: 600; color: #fff; min-width: 2px; }
+  .bar-fill.green { background: var(--green); }
+  .bar-fill.red { background: var(--red); }
+  .bar-fill.amber { background: var(--amber); }
+  .bar-value { width: 50px; font-size: 13px; text-align: right; flex-shrink: 0; margin-left: 8px; }
+
+  /* Stacked bar */
+  .stacked-bar { display: flex; height: 40px; border-radius: 6px; overflow: hidden; margin: 12px 0; }
+  .stacked-segment { display: flex; align-items: center; justify-content: center; font-size: 13px; font-weight: 600; color: #fff; }
+  .stacked-legend { display: flex; gap: 24px; margin: 8px 0 20px; font-size: 13px; }
+  .stacked-legend .dot { display: inline-block; width: 12px; height: 12px; border-radius: 3px; margin-right: 6px; vertical-align: middle; }
+
+  /* Callout boxes */
+  .callout { border-radius: 8px; padding: 16px 20px; margin: 20px 0; font-size: 14px; }
+  .callout.insight { background: #ddf4ff; border-left: 4px solid #0969da; }
+  .callout.warning { background: var(--light-amber); border-left: 4px solid var(--amber); }
+  .callout strong { display: block; margin-bottom: 4px; }
+
+  /* Blockquotes */
+  blockquote { border-left: 3px solid var(--border); padding: 8px 16px; margin: 12px 0; color: var(--text-secondary); font-size: 14px; background: var(--light-gray); border-radius: 0 4px 4px 0; }
+  blockquote em { font-style: italic; }
+  blockquote p { margin: 6px 0; }
+
+  /* Donut chart */
+  .donut-container { display: flex; align-items: center; gap: 40px; margin: 24px 0; }
+  .donut-legend { font-size: 14px; }
+  .donut-legend .item { display: flex; align-items: center; margin: 8px 0; }
+  .donut-legend .dot { display: inline-block; width: 14px; height: 14px; border-radius: 3px; margin-right: 10px; flex-shrink: 0; }
+  .donut-legend .pct { font-weight: 700; margin-left: auto; padding-left: 16px; }
+
+  hr { border: none; border-top: 1px solid var(--border); margin: 32px 0; }
+  .footer { color: var(--text-secondary); font-size: 13px; margin-top: 40px; padding-top: 16px; border-top: 1px solid var(--border); }
+
+  @media (max-width: 640px) {
+    .card-grid { grid-template-columns: repeat(2, 1fr); }
+    .donut-container { flex-direction: column; }
+  }
+</style>
+</head>
+<body>
+
+<h1>Copilot Code Review Effectiveness Analysis</h1>
+<p class="subtitle">Android Auth Platform &nbsp;|&nbsp; January 23 – March 23, 2026 &nbsp;|&nbsp; Common, MSAL, Broker Repositories</p>
+
+<!-- ==================== EXECUTIVE SUMMARY ==================== -->
+<h2>Executive Summary</h2>
+
+<p class="lead">We analyzed <strong>every inline code review comment</strong> left by GitHub Copilot on human-authored pull requests across our three Android Auth repositories over the past two months. For each of the <strong>557 comments</strong>, we determined whether the feedback led to a concrete code improvement.</p>
+
+<div class="card-grid">
+  <div class="card gray">
+    <div class="number">57%</div>
+    <div class="label">of comments received<br><strong>no response</strong></div>
+  </div>
+  <div class="card green">
+    <div class="number">41%</div>
+    <div class="label">confirmed<br><strong>helpful</strong></div>
+  </div>
+  <div class="card red">
+    <div class="number">18%</div>
+    <div class="label">confirmed<br><strong>not helpful</strong></div>
+  </div>
+  <div class="card amber">
+    <div class="number">41%</div>
+    <div class="label">unresolved<br><strong>(not evaluated)</strong></div>
+  </div>
+</div>
+
+<div class="callout insight">
+  <strong>The biggest finding isn't about AI quality — it's about adoption.</strong>
+  When engineers <em>do</em> engage with Copilot's comments, 60% turn out to be helpful. But 57% of comments receive no response at all, and 41% of all comments are unresolved — we can't tell if they were useful because no one evaluated them. The true helpfulness rate lies between 41% (confirmed floor) and 82% (if all unresolved were helpful).
+</div>
+
+<!-- ==================== SCOPE ==================== -->
+<h2>Scope</h2>
+
+<div class="card-grid">
+  <div class="card">
+    <div class="number">163</div>
+    <div class="label">Human PRs<br>scanned</div>
+  </div>
+  <div class="card">
+    <div class="number">113</div>
+    <div class="label">PRs received<br>Copilot review (69%)</div>
+  </div>
+  <div class="card">
+    <div class="number">557</div>
+    <div class="label">Inline review<br>comments</div>
+  </div>
+  <div class="card">
+    <div class="number">4.9</div>
+    <div class="label">Avg comments<br>per reviewed PR</div>
+  </div>
+</div>
+
+<!-- ==================== OVERALL RESULTS ==================== -->
+<h2>Overall Results</h2>
+
+<h3>Engineer Response Rate</h3>
+<p>Before looking at helpfulness, it's important to understand how engineers interact with Copilot reviews — because a comment can only demonstrate value if someone reads it.</p>
+
+<div class="stacked-bar">
+  <div class="stacked-segment" style="width:42.9%;background:var(--green)">43% replied</div>
+  <div class="stacked-segment" style="width:57.1%;background:var(--gray)">57% ignored</div>
+</div>
+<div class="stacked-legend">
+  <span><span class="dot" style="background:var(--green)"></span> Engineer replied (239)</span>
+  <span><span class="dot" style="background:var(--gray)"></span> No response (318)</span>
+</div>
+
+<p>More than half of all Copilot review comments receive no human response. The majority of AI feedback enters a void — it may be valid, but we can never confirm its value if no one engages with it.</p>
+
+<h3>Helpfulness Verdict</h3>
+
+<div class="stacked-bar">
+  <div class="stacked-segment" style="width:41.3%;background:var(--green)">41.3%</div>
+  <div class="stacked-segment" style="width:18.1%;background:var(--red)">18.1%</div>
+  <div class="stacked-segment" style="width:40.6%;background:var(--amber)">40.6%</div>
+</div>
+<div class="stacked-legend">
+  <span><span class="dot" style="background:var(--green)"></span> Confirmed Helpful (230)</span>
+  <span><span class="dot" style="background:var(--red)"></span> Confirmed Not Helpful (101)</span>
+  <span><span class="dot" style="background:var(--amber)"></span> Unresolved (226)</span>
+</div>
+
+<table>
+  <tr><th>Verdict</th><th class="right">Count</th><th class="right">%</th><th>Definition</th></tr>
+  <tr><td class="bold" style="color:var(--green)">Confirmed Helpful</td><td class="right">230</td><td class="right">41.3%</td><td>The comment led to a code change — engineer explicitly acknowledged it, or the suggested fix was verified in a subsequent commit diff.</td></tr>
+  <tr><td class="bold" style="color:var(--red)">Confirmed Not Helpful</td><td class="right">101</td><td class="right">18.1%</td><td>The engineer explicitly dismissed the comment with a reason why the feedback was incorrect, irrelevant, or by design.</td></tr>
+  <tr><td class="bold" style="color:var(--amber)">Unresolved</td><td class="right">226</td><td class="right">40.6%</td><td>No reply and no definitive diff evidence. Includes comments on the final commit before merge, files modified at different lines, and comments with no line number. Could be valid feedback that was never evaluated.</td></tr>
+</table>
+
+<div class="callout warning">
+  <strong>226 comments (41%) are unresolved — not because the AI was wrong, but because no one looked.</strong>
+  This includes 122 comments on the final commit before merge where the engineer had no subsequent commits, 45 where the file was modified at different lines, 50 with no line number for verification, and 9 where the file was never modified. If engineers had engaged — even just to dismiss — we would know.
+</div>
+
+<h3>How Helpful Comments Were Delivered</h3>
+<table>
+  <tr><th>Path</th><th class="right">Count</th><th>Description</th></tr>
+  <tr><td>Engineer replied and acknowledged</td><td class="right">144</td><td>"good catch", "fixed", "addressed", "added unit test", "@copilot apply changes"</td></tr>
+  <tr><td>Engineer silently applied the fix</td><td class="right">86</td><td>No reply, but suggestion code or exact line range verified as modified in subsequent commit</td></tr>
+  <tr><td class="bold">Total Confirmed Helpful</td><td class="right bold">230</td><td></td></tr>
+</table>
+
+<h3>How Not Helpful Comments Were Identified</h3>
+<table>
+  <tr><th>Path</th><th class="right">Count</th><th>Description</th></tr>
+  <tr><td>Engineer replied and dismissed</td><td class="right">95</td><td>"won't fix", "this is fine", "not applicable", "Copilot is incorrect"</td></tr>
+  <tr><td>Comment on stale/outdated code</td><td class="right">6</td><td>Comment was on code already changed in a different commit</td></tr>
+  <tr><td class="bold">Total Confirmed Not Helpful</td><td class="right bold">101</td><td></td></tr>
+</table>
+
+<!-- ==================== PER REPO ==================== -->
+<h2>Results by Repository</h2>
+
+<div class="bar-chart">
+  <div class="bar-row">
+    <div class="bar-label">Broker</div>
+    <div class="bar-track">
+      <div class="bar-fill green" style="width:48.5%">48.5%</div>
+      <div class="bar-fill" style="width:16.4%;background:var(--red)">16%</div>
+      <div class="bar-fill amber" style="width:35.2%">35%</div>
+    </div>
+    <div class="bar-value">293</div>
+  </div>
+  <div class="bar-row">
+    <div class="bar-label">Common</div>
+    <div class="bar-track">
+      <div class="bar-fill green" style="width:36.2%">36.2%</div>
+      <div class="bar-fill" style="width:18.1%;background:var(--red)">18%</div>
+      <div class="bar-fill amber" style="width:45.7%">46%</div>
+    </div>
+    <div class="bar-value">188</div>
+  </div>
+  <div class="bar-row">
+    <div class="bar-label">MSAL</div>
+    <div class="bar-track">
+      <div class="bar-fill green" style="width:26.3%">26%</div>
+      <div class="bar-fill" style="width:25.0%;background:var(--red)">25%</div>
+      <div class="bar-fill amber" style="width:48.7%">49%</div>
+    </div>
+    <div class="bar-value">76</div>
+  </div>
+</div>
+<div class="stacked-legend">
+  <span><span class="dot" style="background:var(--green)"></span> Helpful</span>
+  <span><span class="dot" style="background:var(--red)"></span> Not Helpful</span>
+  <span><span class="dot" style="background:var(--amber)"></span> Unresolved</span>
+  <span style="color:var(--text-secondary)">(number = total comments)</span>
+</div>
+
+<table>
+  <tr><th>Repository</th><th class="right">Comments</th><th class="right">Response Rate</th><th class="right">Helpful</th><th class="right">Not Helpful</th><th class="right">Unresolved</th></tr>
+  <tr><td class="bold">Broker</td><td class="right">293</td><td class="right">56.0%</td><td class="right" style="color:var(--green)">142 (48.5%)</td><td class="right" style="color:var(--red)">48 (16.4%)</td><td class="right" style="color:var(--amber)">103 (35.2%)</td></tr>
+  <tr><td class="bold">Common</td><td class="right">188</td><td class="right">29.8%</td><td class="right" style="color:var(--green)">68 (36.2%)</td><td class="right" style="color:var(--red)">34 (18.1%)</td><td class="right" style="color:var(--amber)">86 (45.7%)</td></tr>
+  <tr><td class="bold">MSAL</td><td class="right">76</td><td class="right">25.0%</td><td class="right" style="color:var(--green)">20 (26.3%)</td><td class="right" style="color:var(--red)">19 (25.0%)</td><td class="right" style="color:var(--amber)">37 (48.7%)</td></tr>
+</table>
+
+<p>Broker has the highest response rate (56%) and the highest confirmed helpfulness (49%). In Common and MSAL — where response rates are below 30% — nearly half of all comments are unresolved.</p>
+
+<!-- ==================== PER ENGINEER ==================== -->
+<h2>Results by Engineer</h2>
+<p style="font-size:14px;color:var(--text-secondary)">Each engineer has two GitHub accounts (personal + EMU). These have been merged. Names are anonymized.</p>
+
+<table>
+  <tr><th>Engineer</th><th class="right">Comments</th><th class="right">Replied</th><th class="right">Response Rate</th><th class="right" style="color:var(--green)">Helpful</th><th class="right" style="color:var(--red)">Not Helpful</th><th class="right" style="color:var(--amber)">Unresolved</th><th class="right">Helpfulness</th></tr>
+  <tr><td class="bold">Engineer A</td><td class="right">20</td><td class="right">20</td><td class="right bold">100%</td><td class="right">14</td><td class="right">6</td><td class="right">0</td><td class="right bold" style="color:var(--green)">70.0%</td></tr>
+  <tr><td class="bold">Engineer B</td><td class="right">83</td><td class="right">75</td><td class="right bold">90.4%</td><td class="right">57</td><td class="right">26</td><td class="right">0</td><td class="right bold" style="color:var(--green)">68.7%</td></tr>
+  <tr><td class="bold">Engineer C</td><td class="right">15</td><td class="right">4</td><td class="right">26.7%</td><td class="right">8</td><td class="right">2</td><td class="right">5</td><td class="right bold">53.3%</td></tr>
+  <tr><td class="bold">Engineer D</td><td class="right">40</td><td class="right">30</td><td class="right bold">75.0%</td><td class="right">19</td><td class="right">14</td><td class="right">7</td><td class="right bold">47.5%</td></tr>
+  <tr><td class="bold">Engineer E</td><td class="right">110</td><td class="right">37</td><td class="right">33.6%</td><td class="right">44</td><td class="right">18</td><td class="right" style="color:var(--amber)">48</td><td class="right">40.0%</td></tr>
+  <tr><td class="bold">Engineer F</td><td class="right">99</td><td class="right">20</td><td class="right">20.2%</td><td class="right">36</td><td class="right">11</td><td class="right" style="color:var(--amber)">52</td><td class="right">36.4%</td></tr>
+  <tr><td class="bold">Engineer G</td><td class="right">63</td><td class="right">22</td><td class="right">34.9%</td><td class="right">20</td><td class="right">12</td><td class="right" style="color:var(--amber)">31</td><td class="right">31.7%</td></tr>
+  <tr><td class="bold">Engineer H</td><td class="right">100</td><td class="right">24</td><td class="right">24.0%</td><td class="right">27</td><td class="right">12</td><td class="right" style="color:var(--amber)">61</td><td class="right">27.0%</td></tr>
+  <tr><td class="bold">Engineer I</td><td class="right">24</td><td class="right">6</td><td class="right">25.0%</td><td class="right">5</td><td class="right">0</td><td class="right" style="color:var(--amber)">19</td><td class="right">20.8%</td></tr>
+  <tr><td class="bold">Engineer J</td><td class="right">3</td><td class="right">1</td><td class="right">33.3%</td><td class="right">0</td><td class="right">0</td><td class="right" style="color:var(--amber)">3</td><td class="right">0%</td></tr>
+</table>
+
+<div class="callout insight">
+  <strong>Engagement drives value — and visibility.</strong>
+  Engineer A (100% response rate) and Engineer B (90%) have the highest helpfulness <em>and</em> zero unresolved comments. When you engage, you know exactly what the AI got right and wrong. Engineer F (20%) and Engineer H (24%) have 52 and 61 unresolved comments respectively — over half their feedback goes into a black hole.
+</div>
+
+<!-- ==================== RESPONSE DEEP DIVE ==================== -->
+<h2>Response Behavior Deep Dive</h2>
+
+<h3>What happens when engineers reply? (239 comments)</h3>
+<div class="stacked-bar">
+  <div class="stacked-segment" style="width:60.3%;background:var(--green)">60% helpful</div>
+  <div class="stacked-segment" style="width:39.7%;background:var(--red)">40% not helpful</div>
+</div>
+<p>When engineers engage, the majority of Copilot feedback turns out useful. This is the strongest signal that <strong>review quality is decent — the bottleneck is adoption</strong>.</p>
+
+<h3>What happens when engineers don't reply? (318 comments)</h3>
+<div class="stacked-bar">
+  <div class="stacked-segment" style="width:27%;background:var(--green)">27% silently fixed</div>
+  <div class="stacked-segment" style="width:1.9%;background:var(--red)"></div>
+  <div class="stacked-segment" style="width:71.1%;background:var(--amber)">71% unresolved</div>
+</div>
+
+<table>
+  <tr><th>What happened</th><th class="right">Count</th><th class="right">% of ignored</th><th>Verdict</th></tr>
+  <tr><td>Suggestion code silently applied (verified via diff)</td><td class="right">50</td><td class="right">15.7%</td><td style="color:var(--green)">Confirmed Helpful</td></tr>
+  <tr><td>Exact commented lines modified in subsequent commit</td><td class="right">7</td><td class="right">2.2%</td><td style="color:var(--green)">Confirmed Helpful</td></tr>
+  <tr><td>Evidence of fix at nearby lines (re-audit)</td><td class="right">29</td><td class="right">9.1%</td><td style="color:var(--green)">Confirmed Helpful</td></tr>
+  <tr style="background:var(--light-amber)"><td>Merged without any subsequent commits</td><td class="right">122</td><td class="right">38.4%</td><td style="color:var(--amber)"><strong>Unresolved</strong></td></tr>
+  <tr style="background:var(--light-amber)"><td>File modified but not at the commented lines</td><td class="right">45</td><td class="right">14.2%</td><td style="color:var(--amber)"><strong>Unresolved</strong></td></tr>
+  <tr style="background:var(--light-amber)"><td>File modified but no line number to verify</td><td class="right">50</td><td class="right">15.7%</td><td style="color:var(--amber)"><strong>Unresolved</strong></td></tr>
+  <tr style="background:var(--light-amber)"><td>File never modified after the comment</td><td class="right">9</td><td class="right">2.8%</td><td style="color:var(--amber)"><strong>Unresolved</strong></td></tr>
+  <tr><td>Comment on stale/outdated code</td><td class="right">6</td><td class="right">1.9%</td><td style="color:var(--red)">Confirmed Not Helpful</td></tr>
+</table>
+
+<p>The single largest category is the <strong>122 comments (38% of ignored)</strong> where the engineer merged the PR without pushing any additional commits after Copilot's review. These comments may have been valid, but we cannot tell because the engineer never evaluated them.</p>
+
+<!-- ==================== WHAT COPILOT IS GOOD AT ==================== -->
+<h2>What Copilot Is Good At</h2>
+
+<p><strong>Catching real bugs:</strong></p>
+<blockquote>
+  <p><em>PR #3050 (Common):</em> Copilot flagged that <code>"$it"</code> string wrapping doesn't JSON-escape the content, which could break consumers if contract values contain special characters.</p>
+  <p><strong>Engineer reply:</strong> "You're right. Making the change."</p>
+</blockquote>
+
+<p><strong>Stale documentation and naming inconsistencies:</strong></p>
+<blockquote>
+  <p><em>PR #64 (Broker):</em> Copilot identified four locations where KDoc still referenced the old flight constant <code>USE_TEE_ONLY_FOR_TOKEN_BINDING</code> after it was renamed. All four were silently fixed in the next commit.</p>
+</blockquote>
+
+<p><strong>Dead code and unused imports:</strong></p>
+<blockquote>
+  <p><em>PR #3040 (Common):</em> Copilot spotted an unused local variable <code>enabledSettingRaw</code>. Verified as fixed via diff — the suggested replacement code appeared in the commit additions.</p>
+</blockquote>
+
+<p><strong>CI/pipeline configuration issues:</strong></p>
+<blockquote>
+  <p><em>PR #3038 (Common):</em> Copilot warned that <code>vmImage: 'windows-latest'</code> makes the CD pipeline non-deterministic. The engineer changed to a pinned image version.</p>
+</blockquote>
+
+<p><strong>The <code>@copilot apply</code> workflow:</strong></p>
+<blockquote>
+  <p>In 16 instances (2.9%), engineers validated Copilot's feedback and then delegated the fix back with: <code>@copilot open a new pull request to apply changes based on [this feedback]</code>. An efficient pattern where AI identifies <em>and</em> fixes the issue.</p>
+</blockquote>
+
+<!-- ==================== WHAT COPILOT STRUGGLES WITH ==================== -->
+<h2>What Copilot Struggles With</h2>
+
+<p><strong>Lacking domain context:</strong></p>
+<blockquote>
+  <p><em>Copilot:</em> "<code>shared_device_id</code> could be used for tracking across apps — consider hashing before emission."</p>
+  <p><em>Engineer:</em> "The shared_device_id is a random UUID generated by one of the participating apps and is not PII."</p>
+</blockquote>
+
+<p><strong>Suggesting tests for trivial code:</strong></p>
+<blockquote>
+  <p><em>Copilot:</em> "New telemetry attributes lack test coverage..."</p>
+  <p><em>Engineer:</em> "These are just telemetry related changes and adding unit tests will be overdo here."</p>
+</blockquote>
+
+<p><strong>Misunderstanding APIs:</strong></p>
+<blockquote>
+  <p><em>Copilot:</em> "<code>00000003-0000-0ff1-ce00-000000000000</code> is the resource ID for Microsoft Graph, not SharePoint Online."</p>
+  <p><em>Engineer:</em> "00000003-0000-0ff1-ce00-000000000000 is SharePoint Online."</p>
+</blockquote>
+
+<p><strong>Commenting on intentional design choices:</strong></p>
+<blockquote>
+  <p><em>Copilot:</em> "<code>getPackageInfo() != null</code> is redundant since it either returns PackageInfo or throws..."</p>
+  <p><em>Engineer:</em> "This is fine. The verbosity makes the code clearer to understand."</p>
+</blockquote>
+
+<!-- ==================== KEY TAKEAWAYS ==================== -->
+<h2>Key Takeaways</h2>
+
+<table>
+  <tr><th style="width:30px">#</th><th>Finding</th></tr>
+  <tr><td class="bold">1</td><td><strong>57% of Copilot review comments receive no response from engineers.</strong> The majority of AI review feedback is never acknowledged. Of the ignored comments, only 27% are silently addressed — the remaining 71% are unresolved.</td></tr>
+  <tr><td class="bold">2</td><td><strong>41% of all comments led to a confirmed code improvement.</strong> But 41% are unresolved, meaning the true helpfulness rate lies between 41% (floor) and 82% (ceiling). We cannot narrow this range without engineer engagement.</td></tr>
+  <tr><td class="bold">3</td><td><strong>When engineers engage, 60% of comments are helpful.</strong> This suggests the AI review quality itself is decent — the bottleneck is adoption, not accuracy.</td></tr>
+  <tr><td class="bold">4</td><td><strong>Only 18% of comments are confirmed not helpful.</strong> When we restrict "not helpful" to comments with actual evidence of poor quality — explicit engineer dismissals — the rate is surprisingly low.</td></tr>
+  <tr><td class="bold">5</td><td><strong>Engagement is the strongest predictor of value.</strong> Engineers who reply to 75%+ of comments see 47-70% helpfulness with zero unresolved. Engineers who reply to &lt;35% see 20-40% helpfulness with massive unresolved buckets.</td></tr>
+  <tr><td class="bold">6</td><td><strong>38% of ignored comments are on the final commit before merge.</strong> These 122 comments represent the last review round being skipped — the feedback had zero chance of impact regardless of its quality.</td></tr>
+  <tr><td class="bold">7</td><td><strong>Broker gets the most value (49% helpful), Common is middling (36%), MSAL is lowest (26%).</strong> This correlates with response rate: Broker 56%, Common 30%, MSAL 25%.</td></tr>
+  <tr><td class="bold">8</td><td><strong>At ~5 comments per PR, the signal-to-noise question can't be settled without engagement.</strong> We confirmed ~2 useful comments per PR on average, but with 41% unresolved, the actual number could be higher.</td></tr>
+</table>
+
+<!-- ==================== METHODOLOGY ==================== -->
+<h2>How We Measured This</h2>
+
+<table>
+  <tr><th>Phase</th><th>Method</th></tr>
+  <tr><td class="bold">1. Data collection</td><td>GitHub API extraction of all 557 Copilot inline review comments from 163 human-authored PRs. Excluded PRs by Copilot coding agent.</td></tr>
+  <tr><td class="bold">2. Reply classification</td><td>Automated keyword matching on 239 replied comments to classify as positive, negative, or ambiguous.</td></tr>
+  <tr><td class="bold">3. Diff verification</td><td>For 318 unreplied comments, used the GitHub compare API to check if suggestion tokens appeared as diff additions, or if the exact line range was modified in subsequent commits.</td></tr>
+  <tr><td class="bold">4. AI-assisted classification</td><td>All 133 ambiguous replies were individually read and classified by the AI conducting this analysis (Claude), based on reply text and domain context. These classifications were reviewed by the report author but not independently verified by the original PR engineers.</td></tr>
+  <tr><td class="bold">5. Cross-validation</td><td>All initially "not helpful" classifications were re-examined against diff evidence. 18 were reclassified where evidence was strong (e.g., typo fix + matching -1 line file change).</td></tr>
+</table>
+
+<p style="font-size:14px;margin-top:16px"><strong>Classification rules:</strong> A comment is "Confirmed Helpful" only with positive evidence (engineer acknowledgment or verified code change). "Confirmed Not Helpful" only with positive evidence of poor quality (explicit engineer dismissal). Comments where the engineer did not engage — including the final review round merged without evaluation — are classified as "Unresolved" rather than assumed unhelpful.</p>
+
+<hr>
+<div class="footer">
+  <p>Analysis conducted March 23–24, 2026. Data covers all PRs created January 23 – March 23, 2026 in the Common, MSAL, and Broker repositories.</p>
+  <p>Raw data (557 comments with full text, replies, diff evidence, and verdicts) available for independent verification.</p>
+</div>
+
+</body>
+</html>
diff --git a/.github/skills/copilot-review-analyst/assets/Copilot-Code-Review-Effectiveness-Report.md b/.github/skills/copilot-review-analyst/assets/Copilot-Code-Review-Effectiveness-Report.md
new file mode 100644
index 00000000..e0764b36
--- /dev/null
+++ b/.github/skills/copilot-review-analyst/assets/Copilot-Code-Review-Effectiveness-Report.md
@@ -0,0 +1,270 @@
+# Copilot Code Review Effectiveness Analysis
+
+**Android Auth Platform | January 23 – March 23, 2026**
+
+---
+
+## Executive Summary
+
+We analyzed **every inline code review comment** left by GitHub Copilot on human-authored pull requests across our three Android Auth repositories (Common, MSAL, Broker) over the past two months. For each of the **557 comments**, we determined whether the feedback led to a concrete code improvement — either through an explicit engineer response, or by verifying that the suggested change appeared in subsequent commit diffs.
+
+**Key findings:**
+
+- **57% of Copilot's comments received no response from engineers.** This is the single most important number in this report. The majority of AI review feedback is never even acknowledged.
+- **Of comments that engineers engaged with, 60% were helpful** — a strong signal that the review quality itself is decent.
+- **41% of all comments led to a confirmed code improvement.** But 41% are unresolved — comments that were ignored and where we lack evidence to judge either way. The true helpfulness rate could be significantly higher, but we can't confirm it because no one looked.
+- **Engineers who reply to 75%+ of comments see 47-70% helpfulness.** Engineers who reply to <35% see 20-40%. The biggest lever for improving Copilot review value is not the AI — it's engineer engagement with the feedback.
+
+---
+
+## How We Measured This
+
+This analysis went through five phases to ensure accuracy:
+
+1. **Data collection.** We used the GitHub API to extract all 557 Copilot inline review comments from 163 human-authored PRs (excluding PRs authored by Copilot coding agent). We also recorded which comments received human replies and what those replies said.
+
+2. **Reply-based classification.** For the 239 comments (43%) that received a human reply, we classified the reply as positive (e.g., "good catch", "fixed", "addressed"), negative (e.g., "won't fix", "not applicable", "by design"), or ambiguous.
+
+3. **Diff-level verification.** For the 318 comments (57%) that received no reply, we checked whether the engineer acted on the feedback silently. We used the GitHub compare API to examine the diff between the commit Copilot reviewed and the final PR head. For comments containing GitHub suggestion blocks, we checked if the suggested code tokens appeared as additions in the diff. For prose comments, we checked if the exact line range was modified in a subsequent commit.
+
+4. **AI-assisted reply classification.** All 133 comments with ambiguous replies were individually read and classified by the AI conducting this analysis (Claude), based on the reply text and domain context. For example, "this is just telemetry" was classified as dismissed, "@copilot apply changes" was classified as helpful (engineer delegated the fix back to Copilot), and "Added unit test for this" was classified as helpful (engineer acted on the suggestion). These classifications were reviewed for accuracy by the report author but were not independently verified by the original PR engineers.
+
+5. **Cross-validation.** All comments initially classified as "not helpful" were re-examined against the diff evidence. 18 were reclassified as helpful where the evidence was strong (e.g., a typo fix suggestion with a corresponding -1 line file change, or an unused import suggestion with a matching -2 line change).
+
+The final dataset classifies each of the 557 comments as **confirmed helpful**, **confirmed not helpful**, or **unresolved** (insufficient evidence to determine). No comment is left without a classification.
+
+---
+
+## Overall Results
+
+| Metric | Value |
+|--------|-------|
+| Human PRs scanned | 163 |
+| PRs that received Copilot review | 113 (69%) |
+| Total inline review comments | 557 |
+| PR-level summary comments | 205 |
+| Average comments per reviewed PR | 4.9 |
+
+### Engineer Response Rate
+
+Before looking at helpfulness, it's important to understand how engineers interact with Copilot reviews — because a comment can only demonstrate value if someone reads it.
+
+| Behavior | Count | Percentage |
+|----------|-------|------------|
+| **Engineer replied** (any response — acceptance, dismissal, or discussion) | 239 | **42.9%** |
+| **Engineer did not reply** | 318 | **57.1%** |
+
+More than half of all Copilot review comments receive no human response at all. This means the majority of AI feedback enters a void — it may be valid, but we can never confirm its value if no one engages with it.
+
+### Helpfulness Verdict
+
+Each comment was classified into one of three categories:
+
+| Verdict | Count | Percentage | Definition |
+|---------|-------|------------|------------|
+| **Confirmed Helpful** | **230** | **41.3%** | The comment led to a code change — either the engineer explicitly acknowledged it, or the suggested fix was verified in a subsequent commit diff. |
+| **Confirmed Not Helpful** | **101** | **18.1%** | The engineer explicitly dismissed the comment with a reason why the feedback was incorrect, irrelevant, or by design. We have positive evidence that the comment was *wrong*, not merely that it was *ignored*. |
+| **Unresolved** | **226** | **40.6%** | The comment received no reply AND we could not confirm whether it was addressed. This includes cases where the engineer merged without any subsequent commits (the final review round was never evaluated), where the file was modified but not at the specific lines Copilot flagged, or where the comment had no line number making verification impossible. |
+
+The **unresolved** category is the most important number in this report. These 226 comments — **41% of all feedback** — are not confirmed failures of the AI. They are comments where we simply lack evidence either way because no one engaged with them. Many may be perfectly valid feedback that was never read. If engineers had engaged with them (even just to dismiss), we would know. The true helpfulness rate likely falls somewhere between 41% (confirmed floor) and 82% (if all unresolved were helpful).
+
+### How Each Category Breaks Down
+
+**Confirmed Helpful (230):**
+
+| Path | Count | Description |
+|------|-------|-------------|
+| Engineer replied and acknowledged | 144 | Engineer explicitly confirmed the feedback was useful (e.g., "good catch", "fixed", "addressed", "added unit test") |
+| Engineer silently applied the fix | 86 | No reply, but the suggestion code or exact line range was verified as modified in a subsequent commit |
+
+**Confirmed Not Helpful (101):**
+
+| Path | Count | Description |
+|------|-------|-------------|
+| Engineer replied and dismissed | 95 | Engineer explicitly explained why the comment was wrong, irrelevant, or by design (e.g., "won't fix", "this is fine", "Copilot is incorrect", "false positive") |
+| Comment on code already changed | 6 | Comment was on stale/outdated code that had already been modified in a different commit |
+
+**Unresolved (226):**
+
+| Path | Count | Description |
+|------|-------|-------------|
+| Merged without any subsequent commits | 122 | No commits after Copilot's review — the engineer merged the PR without acting on the final review round. The comment may have been valid, but we cannot tell because the engineer never evaluated it. |
+| File modified but not at the commented lines | 45 | The file was changed after the review, but the diff shows the changes were at different lines than what Copilot flagged. Possibly addressed via a different approach, or possibly coincidental. |
+| File modified but no line number to verify | 50 | Copilot's comment had no line number metadata, and the file was modified. We cannot confirm whether the specific concern was addressed. |
+| File never modified after the comment | 9 | The file was not touched in any commit after the review, and no reply was left. The comment may have been valid but was ignored. |
+
+---
+
+## Results by Repository
+
+| Repository | Comments | Response Rate | Confirmed Helpful | Confirmed Not Helpful | Unresolved |
+|------------|----------|---------------|------|------|------|
+| **Broker** | 293 | **56.0%** | 142 (48.5%) | 48 (16.4%) | 103 (35.2%) |
+| **Common** | 188 | **29.8%** | 68 (36.2%) | 34 (18.1%) | 86 (45.7%) |
+| **MSAL** | 76 | **25.0%** | 20 (26.3%) | 19 (25.0%) | 37 (48.7%) |
+
+Broker has the highest response rate (56%) and correspondingly the highest confirmed helpfulness (49%). But even in Broker, 35% of comments are unresolved. In Common and MSAL — where response rates are below 30% — nearly half of all comments are unresolved, meaning we cannot determine whether the AI's feedback was useful because engineers didn't evaluate it.
+
+Coverage across the three repos:
+
+| Repository | Human PRs | PRs Reviewed by Copilot | Coverage |
+|------------|-----------|------------------------|----------|
+| Common | 68 | 47 | 69% |
+| MSAL | 31 | 19 | 61% |
+| Broker | 64 | 47 | 73% |
+
+---
+
+## Results by Engineer
+
+Each engineer has two GitHub accounts (a personal account for public repos and an EMU account for the private broker repo). These have been merged. Names are anonymized.
+
+| Engineer | Comments | Replied | Ignored | Response Rate | Confirmed Helpful | Confirmed Not Helpful | Unresolved | Helpfulness |
+|----------|----------|---------|---------|---------------|------|------|------|-------------|
+| **Engineer A** | 20 | 20 | 0 | **100%** | 14 | 6 | 0 | **70.0%** |
+| **Engineer B** | 83 | 75 | 8 | **90.4%** | 57 | 26 | 0 | **68.7%** |
+| **Engineer C** | 15 | 4 | 11 | **26.7%** | 8 | 2 | 5 | **53.3%** |
+| **Engineer D** | 40 | 30 | 10 | **75.0%** | 19 | 14 | 7 | **47.5%** |
+| **Engineer E** | 110 | 37 | 73 | **33.6%** | 44 | 18 | 48 | **40.0%** |
+| **Engineer F** | 99 | 20 | 79 | **20.2%** | 36 | 11 | 52 | **36.4%** |
+| **Engineer G** | 63 | 22 | 41 | **34.9%** | 20 | 12 | 31 | **31.7%** |
+| **Engineer H** | 100 | 24 | 76 | **24.0%** | 27 | 12 | 61 | **27.0%** |
+| **Engineer I** | 24 | 6 | 18 | **25.0%** | 5 | 0 | 19 | **20.8%** |
+| **Engineer J** | 3 | 1 | 2 | **33.3%** | 0 | 0 | 3 | **0%** |
+
+*Helpfulness = Confirmed Helpful / Total Comments. Response Rate = Replied / Total Comments. Unresolved = comments with no reply and no definitive diff evidence either way.*
+
+**Key observation:** There is a strong correlation between response rate and helpfulness. Engineer A (100% response rate) and Engineer B (90%) have the highest helpfulness (70% and 69%) — and crucially, **zero unresolved comments**. When engineers engage, we know exactly what's helpful and what's not. Engineers with low response rates (Engineer F at 20%, Engineer H at 24%) have massive unresolved buckets (52 and 61 comments respectively) — over half their comments go into a black hole where we can't tell if the AI was right or wrong.
+
+---
+
+## Response Behavior Deep Dive
+
+Of the 557 total comments:
+
+- **239 (42.9%) received a reply.** Of those, **60.3% were helpful** and 39.7% were not helpful. When engineers engage, the majority of Copilot feedback turns out to be useful.
+- **318 (57.1%) were ignored.** Of those, **27.0% were silently addressed** (verified via diff), **1.9% were on stale code** (confirmed not helpful), and the remaining **71.1% are unresolved** — we cannot determine whether the comment was useful because the engineer never evaluated it.
+
+### What happens to ignored comments
+
+| What happened | Count | % of ignored | Verdict |
+|---------------|-------|-------------|---------|
+| Suggestion code silently applied (verified via diff) | 50 | 15.7% | Confirmed Helpful |
+| Exact commented lines modified in subsequent commit | 7 | 2.2% | Confirmed Helpful |
+| Re-audit: evidence of fix at nearby lines | 29 | 9.1% | Confirmed Helpful |
+| Merged without any subsequent commits | 122 | 38.4% | **Unresolved** |
+| File never modified after the comment | 9 | 2.8% | **Unresolved** |
+| Comment on stale/outdated code | 6 | 1.9% | Confirmed Not Helpful |
+| File modified but not at the commented lines | 45 | 14.2% | **Unresolved** |
+| File modified but no line number to verify | 50 | 15.7% | **Unresolved** |
+
+The single largest category is the **122 comments (38.4% of ignored)** where the engineer merged the PR without pushing any additional commits after Copilot's review. These represent the final review round being skipped entirely — the feedback had zero chance of impact regardless of its quality.
+
+---
+
+## What Copilot Is Good At
+
+The most common categories of helpful comments, with real examples from our PRs:
+
+**Catching real bugs:**
+> *PR #3050 (Common):* Copilot flagged that `"$it"` string wrapping doesn't JSON-escape the content, which could break consumers if contract values contain special characters.
+> *Engineer reply: "You're right. Making the change."*
+
+**Stale documentation and naming inconsistencies:**
+> *PR #64 (Broker):* Copilot identified four locations where KDoc still referenced the old flight constant `USE_TEE_ONLY_FOR_TOKEN_BINDING` after it was renamed to `USE_TEE_ONLY_FOR_HARDWARE_BOUND_KEYS`. All four were silently fixed in the next commit.
+
+**Dead code and unused imports:**
+> *PR #3040 (Common):* Copilot spotted an unused local variable `enabledSettingRaw` that was assigned but never read. Verified as fixed via diff analysis — the suggested replacement code appeared in the commit additions.
+
+**CI/pipeline configuration issues:**
+> *PR #3038 (Common):* Copilot warned that using `vmImage: 'windows-latest'` makes the CD pipeline non-deterministic. The engineer changed to a pinned image version.
+
+**The `@copilot apply` workflow:**
+> In 16 instances (2.9%), engineers validated Copilot's feedback and then delegated the fix back to Copilot with: `@copilot open a new pull request to apply changes based on [this feedback]`. This is an efficient pattern where AI identifies and fixes the issue end-to-end.
+
+---
+
+## What Copilot Struggles With
+
+The most common categories of unhelpful comments, with real examples:
+
+**Lacking domain context:**
+> *Copilot:* "`shared_device_id` could be used for tracking across apps — consider hashing before emission."
+> *Engineer:* "The shared_device_id is a random UUID generated by one of the participating apps and is not PII."
+>
+> Copilot applied a general security heuristic without understanding that this particular identifier is already random and non-linkable.
+
+**Suggesting tests for trivial code:**
+> *Copilot:* "New telemetry attributes lack test coverage..."
+> *Engineer:* "These are just telemetry related changes and adding unit tests will be overdo here."
+>
+> This was a recurring theme — Copilot frequently requests tests for logging/telemetry code that the team considers low-risk.
+
+**Misunderstanding APIs:**
+> *Copilot:* "`00000003-0000-0ff1-ce00-000000000000` is the resource ID for Microsoft Graph, not SharePoint Online."
+> *Engineer:* "00000003-0000-0ff1-ce00-000000000000 is SharePoint Online."
+>
+> Copilot was factually wrong about a well-known Microsoft service resource ID.
+
+**Commenting on intentional design choices:**
+> *Copilot:* "`getPackageInfo() != null` is redundant since it either returns PackageInfo or throws..."
+> *Engineer:* "This is fine. The verbosity makes the code clearer to understand."
+
+**Over-engineering suggestions:**
+> *Copilot:* "Use a bounded min-heap of size `PRT_ARTIFACT_LIMIT` to reduce overhead..."
+> *Engineer:* (The list can only ever contain 3 items — there are only 3 broker apps on Android.)
+
+---
+
+## Most Reviewed Files
+
+| Rank | File | Comments |
+|------|------|----------|
+| 1 | `.github/workflows/copilot-issue-response.yml` | 27 |
+| 2 | `broker4j/.../AttributeName.java` | 19 |
+| 3 | `.github/workflows/copilot-ci-feedback.md` | 16 |
+| 4 | `common/.../AuthorizationFragment.java` | 15 |
+| 5 | `broker4j/.../MultipleWorkplaceJoinDataStore.java` | 11 |
+| 6 | `broker4j/.../AbstractBrokerController.java` | 11 |
+| 7 | `common/.../AzureActiveDirectoryWebViewClient.java` | 11 |
+| 8 | `AADAuthenticator/.../BrowserSsoProvider.kt` | 10 |
+| 9 | `broker4j/.../BrokerFlight.java` | 9 |
+| 10 | `broker4j/.../DeviceRegistrationRequestHandler.java` | 9 |
+
+CI/workflow files and large Java classes with many change touchpoints attract the highest volume of comments. `AttributeName.java` appears frequently because many PRs add telemetry attributes.
+
+---
+
+## Key Takeaways
+
+1. **57% of Copilot review comments receive no response from engineers.** This is the most significant finding. The majority of AI review feedback is never acknowledged. Some of it is silently acted on (27% of ignored comments show diff evidence of a fix), but the vast majority — 71% of ignored comments — are unresolved. We simply don't know if they were useful because no one evaluated them.
+
+2. **41% of all comments led to a confirmed code improvement.** But 41% are unresolved, meaning the true helpfulness rate lies between 41% (floor) and 82% (ceiling). We cannot narrow this range without engineer engagement.
+
+3. **When engineers engage, 60% of comments are helpful.** Of the 239 comments that received a reply, 144 (60%) led to acknowledged improvements. This suggests the AI review quality itself is decent — the bottleneck is adoption, not accuracy.
+
+4. **Only 18% of comments are confirmed not helpful.** When we restrict "not helpful" to comments where the engineer explicitly dismissed the feedback — the only cases where we have positive evidence of low quality — the rate is surprisingly low.
+
+5. **Engagement is the strongest predictor of value.** Engineers who reply to 75%+ of comments see 47-70% confirmed helpfulness with zero unresolved comments. Engineers who reply to <35% see 20-40% helpfulness with massive unresolved buckets (50-60% of their comments). The tool works better when engineers work with it.
+
+6. **38% of ignored comments are on the final commit before merge.** These 122 comments represent the last review round being skipped entirely — the engineer merged without pushing any further changes. These comments may have been perfectly valid, but the feedback had zero chance of impact.
+
+7. **Broker gets the most value (49%), Common is middling (36%), MSAL is lowest (26%).** This correlates with response rate: Broker engineers reply to 56% of comments, while Common (30%) and MSAL (25%) reply far less.
+
+8. **At ~5 comments per PR, the signal-to-noise conversation can't be settled without engagement.** We confirmed ~2 useful comments per PR on average, but with 41% of comments unresolved, the actual number could be higher. The only way to know is for engineers to evaluate the feedback.
+
+---
+
+## Methodology Notes
+
+- **Scope.** Only inline code review comments from the `Copilot` and `copilot-pull-request-reviewer[bot]` users were counted. PR-level summary comments (205 total) were excluded from the helpfulness analysis.
+- **Bot exclusions.** PRs authored by `copilot-swe-agent` (Copilot coding agent) were excluded. Only PRs authored by human engineers were analyzed.
+- **Diff verification.** For suggestion blocks, we extracted the suggested code tokens and checked if they appeared as `+` (addition) lines in the compare diff between the comment's commit and the PR head. For prose comments, we checked if the diff hunk line ranges overlapped with the comment's target line range (±5 line tolerance). This is a conservative approach — some fixes that refactored code differently than suggested may be missed.
+- **AI-assisted reply classification.** Every comment that could not be definitively classified by automated methods was individually read and classified by the AI conducting this analysis (Claude), based on the reply text and domain context. These AI classifications were reviewed for accuracy by the report author but were not independently verified by the original PR engineers.
+- **Conservative approach.** We only classify a comment as "Confirmed Helpful" when there is positive evidence: an explicit engineer acknowledgment, or verified code changes at the exact lines/tokens suggested. We only classify as "Confirmed Not Helpful" when there is positive evidence that the comment was *wrong or irrelevant*: an explicit engineer dismissal with a stated reason. Comments where the engineer simply did not engage — including the final review round that was merged without evaluation, files that were never modified, and files modified at different lines — are classified as "Unresolved" rather than assumed to be unhelpful.
+- **Account merging.** Engineers with separate public GitHub accounts and EMU (Enterprise Managed User) accounts were merged based on known identity mappings.
+- **Data availability.** Raw data for all 557 comments (including comment text, engineer replies, diff verification evidence, and final verdicts) is stored at `%TEMP%\copilot-review-analysis\` for independent verification.
+
+---
+
+*Analysis conducted March 23-24, 2026. Data covers all PRs created January 23 – March 23, 2026 in the Common, MSAL, and Broker repositories.*
diff --git a/.github/skills/copilot-review-analyst/references/account-map.json b/.github/skills/copilot-review-analyst/references/account-map.json
new file mode 100644
index 00000000..c0d87b27
--- /dev/null
+++ b/.github/skills/copilot-review-analyst/references/account-map.json
@@ -0,0 +1,22 @@
+{
+    "shahzaibj": "Shahzaib",
+    "shjameel_microsoft": "Shahzaib",
+    "Prvnkmr337": "Praveen",
+    "prsaminathan_microsoft": "Praveen",
+    "siddhijain": "Siddhi",
+    "siddhijain_microsoft": "Siddhi",
+    "mohitc1": "Mohit",
+    "mchand_microsoft": "Mohit",
+    "rpdome": "Dome",
+    "rapong_microsoft": "Dome",
+    "p3dr0rv": "Pedro",
+    "pedroro_microsoft": "Pedro",
+    "melissaahn": "Melissa",
+    "melissaahn_microsoft": "Melissa",
+    "fadidurah": "Fadi",
+    "fadidurah_microsoft": "Fadi",
+    "somalaya": "Sowmya",
+    "somalaya_microsoft": "Sowmya",
+    "wzhipan": "Zhipan",
+    "zhipanwang_microsoft": "Zhipan"
+}
diff --git a/.github/skills/copilot-review-analyst/references/classification-rules.md b/.github/skills/copilot-review-analyst/references/classification-rules.md
new file mode 100644
index 00000000..866700df
--- /dev/null
+++ b/.github/skills/copilot-review-analyst/references/classification-rules.md
@@ -0,0 +1,133 @@
+# Classification Rules
+
+Complete classification hierarchy for Copilot review comment analysis.
+
+## Classification Cascade (Priority Order)
+
+Apply rules in this exact order. First match wins.
+
+### Replied Comments (has human reply)
+
+| Priority | Condition | Verdict |
+|----------|-----------|---------|
+| 1 | Reply matches positive keyword patterns | **Helpful** |
+| 2 | Reply matches negative keyword patterns | **Not Helpful** |
+| 3 | Both positive and negative matched (mixed) | Verdict from `mixedResponseVerdict` in manual audit file (default: **Not Helpful**) |
+| 4 | Reply contains `@copilot` (delegated fix) | **Helpful** |
+| 5 | Reply matches acknowledged-action patterns | **Helpful** |
+| 6 | Reply matches explained-away patterns | **Not Helpful** |
+| 7 | Reply matches outdated/dismissed patterns | **Not Helpful** |
+| 8 | Genuinely unclear — apply AI judgment | See AI Classification below |
+
+### No-Response Comments (no human reply)
+
+| Priority | Condition | Verdict |
+|----------|-----------|---------|
+| 9 | `suggestion-applied` or `suggestion-likely-applied` (from diff verification) | **Helpful** |
+| 10 | `exact-lines-modified` (from diff verification) | **Helpful** |
+| 11 | `lines-modified-different-fix` | **Helpful** (nearby lines modified with a different approach — engineer addressed the concern) |
+| 12 | `file-changed-elsewhere` or `file-changed-no-line-info` | **Check re-audit list** — helpful if evidence found, else **Unresolved** |
+| 13 | `file-not-changed`, `no-subsequent-commits`, `not-applied` | **Unresolved** |
+| 14 | Comment on stale/outdated code | **Not Helpful** |
+
+## Keyword Patterns
+
+### Positive Patterns (→ Helpful)
+
+```
+good catch, fixed, done, addressed, will fix, will address,
+thanks, thank you, agreed, makes sense, updated, nice catch,
+you're right, you are right, correct, valid point, great catch,
+resolved, will do, good point, fair point, acknowledged,
+applied, changed, modified, yep, absolutely,
+i'll update, i will update, i'll fix, i will fix,
+good suggestion, great suggestion, nice suggestion,
+will change, will update, pushed a fix, committed,
+good find, great find, indeed,
+making the change, i've updated, i've fixed
+```
+
+### Negative Patterns (→ Not Helpful)
+
+```
+not applicable, n/a, won't fix, wontfix, by design,
+intentional, false positive, not relevant, ignore,
+doesn't apply, not needed, unnecessary, nah, no need,
+disagree, incorrect, wrong, not accurate, hallucin,
+not a real issue, not an issue, this is fine, it's fine,
+already handled, already done, not applicable here,
+copilot is wrong, bot is wrong, misunderstanding,
+out of scope, does not apply, not a concern, not a problem,
+doesn't matter, won't happen, can't happen, impossible
+```
+
+### Acknowledged-Action Patterns (→ Helpful, for unclear replies)
+
+These indicate the engineer acted on the feedback even if they didn't use standard positive keywords:
+
+```
+added tests?, refactored, removed, reverted, renamed,
+implemented, reworked, update signature, log warning,
+move check, add test, add unit test, nice job, good bot
+```
+
+Use word-boundary matching (`\b`) for these.
+
+### Explained-Away Patterns (→ Not Helpful, for unclear replies)
+
+These indicate the engineer explained why the comment is not relevant:
+
+```
+this is, we don't, we do not, we aren't, nope, has been,
+it's a, they're meant, this has, only used, never been,
+was consciously, just telemetry, is just, original behavior,
+overdo, legacy, can only, can never, doesn't need,
+suffix was, timing is not, skip, most of the, empty is fine,
+no longer, will stick, keep the current, consciously
+```
+
+### Outdated/Dismissed Patterns (→ Not Helpful)
+
+```
+outdated, dismissed
+```
+
+## AI Classification for Genuinely Unclear Replies
+
+When no keyword pattern matches, read the reply text with domain context:
+
+1. **Is the engineer confirming they'll act?** Even indirect signals like "addressing in a later commit", "implemented something similar", or linking a commit hash → **Helpful**
+2. **Is the engineer explaining why the feedback doesn't apply?** Phrases like "this is just telemetry", "we consciously chose this", "legacy code" → **Not Helpful**
+3. **Is the reply tangential or administrative?** E.g., "will consider in another PR" → **Helpful** if they acknowledge the issue, **Not Helpful** if they're deflecting
+4. **Does the reply contain a commit SHA or link?** → **Helpful** (engineer is showing they applied a fix)
+
+## Diff Verification Logic
+
+### For Suggestion Blocks
+
+1. Extract code between `` ```suggestion `` and `` ``` `` markers
+2. Tokenize: keep lines >3 chars, skip punctuation-only lines
+3. Compare tokens against `+` (addition) lines in the diff
+4. Token match ratio ≥ 50% AND line range overlap → `suggestion-applied`
+5. Token match ratio ≥ 50% without line overlap → `suggestion-likely-applied`
+
+### For Prose Comments
+
+1. Get the comment's line range (`start_line` to `line`)
+2. Parse diff hunk headers (`@@ -old,count +new,count @@`)
+3. Check if any hunk's old-line range overlaps the comment range ±5 lines
+4. Overlap found → `exact-lines-modified`
+
+### No-Subsequent-Commits Check
+
+If the commit SHA the comment was left on equals the PR head SHA → `no-subsequent-commits`. This means the PR was merged without any further changes after the review.
+
+## Account Mapping
+
+Engineers have separate personal GitHub accounts and EMU (Enterprise Managed User) accounts. Merge them for per-engineer statistics:
+
+```
+personal_login → emu_login → Display Name
+```
+
+The mapping is defined in `references/account-map.json` (external JSON file). Update for new team members by editing the JSON directly — no script changes needed.
diff --git a/.github/skills/copilot-review-analyst/references/manual-audit-template.json b/.github/skills/copilot-review-analyst/references/manual-audit-template.json
new file mode 100644
index 00000000..8400daee
--- /dev/null
+++ b/.github/skills/copilot-review-analyst/references/manual-audit-template.json
@@ -0,0 +1,17 @@
+{
+    "_comment": "This file is produced by Phase 3 (AI-assisted classification). For each new analysis run, the agent reads unclear replies, classifies them, and writes the results here. Phase 4 (final-classification.ps1) reads this file to finalize verdicts.",
+
+    "genuineUnclearHelpful": [
+        "_comment: Reply text patterns (lowercased) that indicate the engineer acted on the feedback, even though keyword matching didn't catch it. These are discovered during Phase 3 manual/AI audit of 'replied-unclear' comments."
+    ],
+
+    "genuineUnclearHelpfulExtra": [
+        "_comment: Additional patterns like commit SHAs or unique strings that confirm a fix was applied."
+    ],
+
+    "reauditFlipKeys": [
+        "_comment: Keys in format 'repo/prNumber/filePattern' for no-response comments where diff verification returned 'file-changed-elsewhere' or 'file-changed-no-line-info' but manual re-audit confirmed the comment was helpful. Discovered during Phase 3."
+    ],
+
+    "mixedResponseVerdict": "not-helpful"
+}
diff --git a/.github/skills/copilot-review-analyst/references/report-formatting.md b/.github/skills/copilot-review-analyst/references/report-formatting.md
new file mode 100644
index 00000000..04e75a7b
--- /dev/null
+++ b/.github/skills/copilot-review-analyst/references/report-formatting.md
@@ -0,0 +1,175 @@
+# Report Formatting Guide
+
+Rules for generating Copilot Code Review Effectiveness reports in Markdown and Outlook-compatible HTML.
+
+## Report Structure
+
+Generate both formats. Templates are in `assets/` within this skill folder.
+
+| # | Section | Content |
+|---|---------|---------|
+| 1 | **Background** | Team context, what repos are covered, what was enabled |
+| 2 | **At a Glance** | 4 summary cards (no-response %, helpful %, not-helpful %, unresolved %) + callout about adoption |
+| 3 | **Overall Results** | Response rate bar, helpfulness verdict bar, breakdown tables |
+| 4 | **Results by Repository** | Per-repo bars + table (comments, response rate, helpful/not/unresolved) |
+| 5 | **Results by Engineer** | Table with colored columns (anonymize names for org-wide sharing) |
+| 6 | **Response Behavior Deep Dive** | What happens to ignored comments (silently applied, merged without commits, etc.) |
+| 7 | **What Copilot Is Good At** | 4-5 real examples with PR references and engineer quotes |
+| 8 | **What Copilot Struggles With** | 4-5 real examples showing false positives, domain gaps |
+| 9 | **Most Reviewed Files** | Top 10 files by comment count |
+| 10 | **Key Takeaways** | 7-8 numbered findings |
+| 11 | **Recommendations** | 3 actionable next steps |
+| 12 | **Methodology Notes** | How data was collected, classified, and validated |
+
+## Statistics to Compute
+
+From `final_classification.json`:
+
+```powershell
+# Overall
+$total = $data.Count
+$helpful = ($data | Where-Object { $_.Verdict -eq "helpful" }).Count
+$notHelpful = ($data | Where-Object { $_.Verdict -eq "not-helpful" }).Count
+$unresolved = $total - $helpful - $notHelpful
+$replied = ($data | Where-Object { $_.Replied -eq $true }).Count
+$responseRate = [math]::Round(($replied / $total) * 100, 1)
+
+# Per-repo
+$repoStats = $data | Group-Object Repo | ForEach-Object { ... }
+
+# Per-engineer
+$engStats = $data | Group-Object Engineer | ForEach-Object { ... }
+```
+
+## Outlook HTML Formatting Rules
+
+Outlook strips most modern CSS. Follow these rules strictly:
+
+### Layout
+- Wrap entire body in a centered `<table width="1000">` for consistent margins
+- Use **table-based layouts only** — no flexbox, no grid, no float
+- All styles must be **inline** — Outlook strips `<style>` blocks entirely
+
+### Headings
+Use a table with colored background instead of `<h1>`–`<h3>`:
+```html
+<table cellpadding="0" cellspacing="0" border="0" width="100%"
+       style="margin:0 0 14px;">
+  <tr>
+    <td style="background:#c8e1ff;border-left:5px solid #0969da;
+               padding:10px 16px;">
+      <font size="4" face="Segoe UI,Helvetica,Arial,sans-serif">
+        <b>Section Title</b>
+      </font>
+    </td>
+  </tr>
+</table>
+```
+
+### Summary Cards
+Use a 4-column `<table>` with nested tables per card:
+```html
+<td width="25%" style="padding:6px;">
+  <table style="border:1px solid #d0d7de;border-left:4px solid #COLOR;">
+    <tr><td style="padding:14px;text-align:center;">
+      <div style="font-size:30px;font-weight:700;color:#COLOR;">VALUE</div>
+      <div style="font-size:12px;color:#656d76;">label</div>
+    </td></tr>
+  </table>
+</td>
+```
+
+Card border colors: `#656d76` (gray/neutral), `#2da44e` (green/helpful), `#cf222e` (red/not helpful), `#bf8700` (yellow/unresolved).
+
+### Bar Charts
+Use table with percentage-width cells and background colors:
+```html
+<table width="100%" style="border-collapse:collapse;">
+  <tr>
+    <td width="41%" style="background:#2da44e;padding:6px;color:#fff;font-size:12px;">
+      Helpful 41%
+    </td>
+    <td width="18%" style="background:#cf222e;padding:6px;color:#fff;font-size:12px;">
+      Not helpful 18%
+    </td>
+    <td width="41%" style="background:#bf8700;padding:6px;color:#fff;font-size:12px;">
+      Unresolved 41%
+    </td>
+  </tr>
+</table>
+```
+
+### Data Tables
+```html
+<table width="100%" cellpadding="0" cellspacing="0" border="0"
+       style="border-collapse:collapse;font-size:13px;">
+  <tr>
+    <td style="background:#f6f8fa;padding:8px 12px;border:1px solid #d0d7de;
+               font-weight:600;">Header</td>
+  </tr>
+  <tr>
+    <td style="padding:8px 12px;border:1px solid #d0d7de;">Data</td>
+  </tr>
+</table>
+```
+
+### Colored Columns (per-engineer table)
+Apply cell backgrounds for visual encoding:
+- Green: `background:#dafbe1` (helpful)
+- Red: `background:#ffebe9` (not helpful)
+- Yellow: `background:#fff8c5` (unresolved)
+
+### Legends
+Use a nested table with colored cells instead of unicode squares:
+```html
+<table cellpadding="0" cellspacing="0" border="0" style="display:inline-table;">
+  <tr>
+    <td style="background:#2da44e;width:12px;height:12px;">&nbsp;</td>
+    <td style="padding:0 8px 0 4px;font-size:12px;">Helpful</td>
+  </tr>
+</table>
+```
+
+### Callout Boxes
+```html
+<table width="100%" style="margin:16px 0;">
+  <tr>
+    <td style="background:#ddf4ff;border-left:4px solid #0969da;
+               padding:14px 18px;font-size:14px;">
+      <strong>Key insight header.</strong> Body text here.
+    </td>
+  </tr>
+</table>
+```
+
+### What Outlook Strips
+- CSS `color` on text elements (use `<font color>` sparingly)
+- `<h1>`–`<h3>` styling
+- `<style>` blocks entirely
+- Flexbox, grid, float
+- CSS variables
+- `border-radius` (degrades gracefully)
+
+### What Outlook Preserves
+- `background` on `<td>`
+- `<font size>` and `<font face>`
+- Table widths (px and %)
+- `<b>`, `<strong>`, `<em>`
+- Inline `style` attributes
+- `border-left`, `border` on cells
+- `padding`, `margin` on cells
+
+## Engineer Anonymization
+
+Generate **two versions** of every report:
+
+| Version | Engineer Names | File Suffix | Audience |
+|---------|---------------|-------------|----------|
+| Team-internal | Real names from account map | *(none)* | Team members |
+| Org-wide | "Engineer A", "Engineer B", etc. | `-Anonymous` | Leadership, other teams |
+
+Anonymization rules for the org-wide version:
+- Sort engineers by helpfulness rate descending, then assign letters (A = highest)
+- Replace names in the per-engineer table, example quotes, and any narrative mentions
+- Keep repo names visible (Common, MSAL, Broker) — these are not sensitive
+- PR numbers may be kept (they're meaningless without repo access)
diff --git a/.github/skills/copilot-review-analyst/scripts/analyze.ps1 b/.github/skills/copilot-review-analyst/scripts/analyze.ps1
new file mode 100644
index 00000000..728a8643
--- /dev/null
+++ b/.github/skills/copilot-review-analyst/scripts/analyze.ps1
@@ -0,0 +1,347 @@
+<#
+.SYNOPSIS
+    Analyze Copilot code review comments across 3 Android Auth repos.
+    Classifies comments as helpful vs unhelpful based on human responses.
+#>
+
+param(
+    [string]$OutputDir = "$env:TEMP\copilot-review-analysis",
+    [string]$StartDate = (Get-Date).AddDays(-60).ToString("yyyy-MM-dd")
+)
+
+$ErrorActionPreference = "Continue"
+New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null
+
+# ========================================
+# HELPER FUNCTION
+# ========================================
+function ClassifyResponse {
+    param($replies)
+    
+    if ($null -eq $replies -or @($replies).Count -eq 0) {
+        return "no-response"
+    }
+    
+    $humanReplyText = (@($replies) | ForEach-Object { $_.body }) -join " "
+    $replyLower = $humanReplyText.ToLower()
+    
+    # Positive signals
+    $positivePatterns = @(
+        "good catch", "fixed", "done", "addressed", "will fix", "will address",
+        "thanks", "thank you", "agreed", "makes sense", "updated", "nice catch",
+        "you're right", "you are right", "correct", "valid point", "great catch",
+        "resolved", "will do", "good point", "fair point", "acknowledged",
+        "applied", "changed", "modified", "yep", "absolutely",
+        "i'll update", "i will update", "i'll fix", "i will fix",
+        "good suggestion", "great suggestion", "nice suggestion",
+        "will change", "will update", "pushed a fix", "committed",
+        "good find", "great find", "indeed",
+        "making the change", "i've updated", "i've fixed"
+    )
+    
+    # Negative signals
+    $negativePatterns = @(
+        "not applicable", "n/a", "won't fix", "wontfix", "by design", 
+        "intentional", "false positive", "not relevant", "ignore",
+        "doesn't apply", "not needed", "unnecessary", "nah", "no need",
+        "disagree", "incorrect", "wrong", "not accurate", "hallucin",
+        "not a real issue", "not an issue", "this is fine", "it's fine",
+        "already handled", "already done", "not applicable here",
+        "copilot is wrong", "bot is wrong", "misunderstanding",
+        "out of scope", "does not apply", "not a concern", "not a problem",
+        "doesn't matter", "won't happen", "can't happen", "impossible"
+    )
+    
+    $isPositive = $false
+    $isNegative = $false
+    
+    foreach ($p in $positivePatterns) {
+        if ($replyLower -match [regex]::Escape($p)) {
+            $isPositive = $true
+            break
+        }
+    }
+    foreach ($n in $negativePatterns) {
+        if ($replyLower -match [regex]::Escape($n)) {
+            $isNegative = $true
+            break
+        }
+    }
+    
+    if ($isPositive -and -not $isNegative) {
+        return "helpful-acknowledged"
+    } elseif ($isNegative -and -not $isPositive) {
+        return "unhelpful-dismissed"
+    } elseif ($isPositive -and $isNegative) {
+        return "mixed-response"
+    } else {
+        return "replied-unclear"
+    }
+}
+
+# Copilot uses "Copilot" for inline review comments
+$COPILOT_USERS = @("Copilot", "copilot-pull-request-reviewer[bot]")
+$BOT_AUTHORS = @("app/copilot-swe-agent", "Copilot", "dependabot[bot]", "github-actions[bot]")
+
+$repos = @(
+    @{ Label = "common"; Slug = "AzureAD/microsoft-authentication-library-common-for-android" },
+    @{ Label = "msal";   Slug = "AzureAD/microsoft-authentication-library-for-android" },
+    @{ Label = "broker"; Slug = "identity-authnz-teams/ad-accounts-for-android" }
+)
+
+$allResults = @()
+$reviewSummaries = @()
+
+foreach ($repo in $repos) {
+    $label = $repo.Label
+    $slug = $repo.Slug
+    Write-Host "`n========================================" -ForegroundColor Cyan
+    Write-Host "Processing: $label ($slug)" -ForegroundColor Cyan
+    Write-Host "========================================" -ForegroundColor Cyan
+
+    # Load cached PR list
+    $prsFile = "$env:TEMP\${label}_prs.json"
+    if (-not (Test-Path $prsFile)) {
+        Write-Host "  Fetching PR list..."
+        gh pr list --repo $slug --state all --limit 200 --json number,title,author,createdAt,state --search "created:>=$StartDate" 2>&1 | Out-File -FilePath $prsFile -Encoding utf8
+    }
+    
+    $allPRs = Get-Content $prsFile | ConvertFrom-Json
+    $humanPRs = $allPRs | Where-Object { $_.author.login -notin $BOT_AUTHORS }
+    
+    Write-Host "  Total PRs: $($allPRs.Count), Human PRs: $($humanPRs.Count)"
+
+    $repoResults = @()
+    $prCount = 0
+    
+    foreach ($pr in $humanPRs) {
+        $prCount++
+        $prNum = $pr.number
+        $prAuthor = $pr.author.login
+        $prTitle = $pr.title
+        
+        # Get ALL review comments (inline code comments) for this PR
+        try {
+            $commentsRaw = gh api "repos/$slug/pulls/$prNum/comments" --paginate 2>&1
+            $comments = $commentsRaw | ConvertFrom-Json
+        } catch {
+            Write-Host "  PR #$prNum - parse error, skipping" -ForegroundColor Yellow
+            continue
+        }
+        
+        if ($null -eq $comments -or $comments.Count -eq 0) {
+            # No inline review comments at all
+            # Still check if copilot left a review summary
+        } else {
+            # Find copilot inline comments (top-level, not replies to others)
+            $copilotComments = $comments | Where-Object { 
+                $_.user.login -in $COPILOT_USERS -and 
+                ($null -eq $_.in_reply_to_id -or $_.in_reply_to_id -eq 0 -or $_.in_reply_to_id -eq "")
+            }
+            
+            if ($copilotComments.Count -gt 0) {
+                Write-Host "  PR #$prNum ($prAuthor): $($copilotComments.Count) copilot inline comments" -ForegroundColor Green
+            }
+
+            foreach ($cc in $copilotComments) {
+                $commentId = $cc.id
+                $commentBody = $cc.body
+                $commentPath = $cc.path
+                $commentLine = $cc.line
+                
+                # Find human replies to this copilot comment
+                $replies = $comments | Where-Object { 
+                    $_.in_reply_to_id -eq $commentId -and $_.user.login -notin $COPILOT_USERS
+                }
+                
+                # Classify the comment
+                $classification = ClassifyResponse -replies $replies
+                
+                $humanReplyText = ($replies | ForEach-Object { $_.body }) -join " | "
+
+                $repoResults += [PSCustomObject]@{
+                    Repo             = $label
+                    PRNumber         = $prNum
+                    PRAuthor         = $prAuthor
+                    PRTitle          = $prTitle
+                    PRState          = $pr.state
+                    CommentId        = $commentId
+                    FilePath         = $commentPath
+                    Line             = $commentLine
+                    CommentBody      = $commentBody
+                    CommentExcerpt   = if ($commentBody.Length -gt 250) { $commentBody.Substring(0, 250) + "..." } else { $commentBody }
+                    HumanReplyCount  = $replies.Count
+                    HumanReplyText   = if ($humanReplyText.Length -gt 400) { $humanReplyText.Substring(0, 400) + "..." } else { $humanReplyText }
+                    Classification   = $classification
+                    CommentType      = "inline"
+                }
+            }
+        }
+        
+        # Also check the review-level summary comments from copilot
+        try {
+            $reviewsRaw = gh api "repos/$slug/pulls/$prNum/reviews" 2>&1
+            $reviews = $reviewsRaw | ConvertFrom-Json
+            $copilotReviews = $reviews | Where-Object { $_.user.login -in $COPILOT_USERS -and $_.body.Length -gt 0 }
+            
+            foreach ($rev in $copilotReviews) {
+                $reviewSummaries += [PSCustomObject]@{
+                    Repo       = $label
+                    PRNumber   = $prNum
+                    PRAuthor   = $prAuthor
+                    PRTitle    = $prTitle
+                    ReviewId   = $rev.id
+                    State      = $rev.state
+                    BodyExcerpt = if ($rev.body.Length -gt 300) { $rev.body.Substring(0, 300) + "..." } else { $rev.body }
+                }
+            }
+        } catch {
+            # Skip if review fetch fails
+        }
+        
+        # Rate limiting
+        if ($prCount % 15 -eq 0) {
+            Write-Host "  ... processed $prCount/$($humanPRs.Count) PRs" -ForegroundColor DarkGray
+            Start-Sleep -Milliseconds 300
+        }
+    }
+    
+    Write-Host "  Repo total: $($repoResults.Count) copilot inline comments found" -ForegroundColor Magenta
+    $allResults += $repoResults
+}
+
+# Save raw results
+$allResults | ConvertTo-Json -Depth 5 | Out-File "$OutputDir\raw_results.json" -Encoding utf8
+$reviewSummaries | ConvertTo-Json -Depth 5 | Out-File "$OutputDir\review_summaries.json" -Encoding utf8
+
+# ========================================
+# STATISTICS
+# ========================================
+Write-Host "`n`n" -NoNewline
+Write-Host "================================================================" -ForegroundColor Yellow
+Write-Host "           COPILOT CODE REVIEW ANALYSIS RESULTS" -ForegroundColor Yellow
+Write-Host "================================================================" -ForegroundColor Yellow
+$endDate = (Get-Date).ToString("MMM d, yyyy")
+$startDateFormatted = [datetime]::Parse($StartDate).ToString("MMM d, yyyy")
+Write-Host "Date Range: $startDateFormatted - $endDate" -ForegroundColor White
+Write-Host "================================================================`n" -ForegroundColor Yellow
+
+$totalComments = $allResults.Count
+Write-Host "TOTAL COPILOT INLINE REVIEW COMMENTS: $totalComments" -ForegroundColor White
+Write-Host "TOTAL COPILOT REVIEW SUMMARIES: $($reviewSummaries.Count)`n" -ForegroundColor White
+
+# Unique PRs with copilot reviews
+$uniquePRs = $allResults | Select-Object -Property Repo,PRNumber -Unique
+Write-Host "PRs WITH COPILOT INLINE COMMENTS: $($uniquePRs.Count)`n" -ForegroundColor White
+
+# Per-repo breakdown
+Write-Host "--- PER REPO BREAKDOWN ---" -ForegroundColor Cyan
+foreach ($repoLabel in @("common", "msal", "broker")) {
+    $repoComments = $allResults | Where-Object { $_.Repo -eq $repoLabel }
+    $repoPRs = ($repoComments | Select-Object -Property PRNumber -Unique).Count
+    $prsFile = "$env:TEMP\${repoLabel}_prs.json"
+    $totalHumanPRs = ((Get-Content $prsFile | ConvertFrom-Json) | Where-Object { $_.author.login -notin $BOT_AUTHORS }).Count
+    Write-Host "  $($repoLabel.ToUpper()): $($repoComments.Count) comments across $repoPRs PRs (out of $totalHumanPRs human PRs)"
+}
+
+# Classification breakdown
+Write-Host "`n--- CLASSIFICATION BREAKDOWN ---" -ForegroundColor Cyan
+$classifications = $allResults | Group-Object -Property Classification | Sort-Object Count -Descending
+foreach ($c in $classifications) {
+    $pct = if ($totalComments -gt 0) { [math]::Round(($c.Count / $totalComments) * 100, 1) } else { 0 }
+    Write-Host "  $($c.Name): $($c.Count) ($pct%)"
+}
+
+# Helpful vs Unhelpful summary
+$helpful = ($allResults | Where-Object { $_.Classification -eq "helpful-acknowledged" }).Count
+$unhelpful = ($allResults | Where-Object { $_.Classification -eq "unhelpful-dismissed" }).Count
+$noResponse = ($allResults | Where-Object { $_.Classification -eq "no-response" }).Count
+$mixed = ($allResults | Where-Object { $_.Classification -eq "mixed-response" }).Count
+$unclear = ($allResults | Where-Object { $_.Classification -eq "replied-unclear" }).Count
+
+Write-Host "`n--- HELPFULNESS SUMMARY ---" -ForegroundColor Cyan
+Write-Host "  Helpful (acknowledged/addressed):  $helpful" -ForegroundColor Green
+Write-Host "  Unhelpful (dismissed/rejected):    $unhelpful" -ForegroundColor Red
+Write-Host "  No response (ignored):             $noResponse" -ForegroundColor DarkGray
+Write-Host "  Mixed response:                    $mixed" -ForegroundColor Yellow
+Write-Host "  Replied but unclear sentiment:     $unclear" -ForegroundColor DarkYellow
+
+$responded = $helpful + $unhelpful + $mixed + $unclear
+if ($responded -gt 0) {
+    $helpfulRate = [math]::Round(($helpful / $responded) * 100, 1)
+    Write-Host "`n  Helpfulness rate (of responded): $helpfulRate%" -ForegroundColor White
+}
+if ($totalComments -gt 0) {
+    $responseRate = [math]::Round(($responded / $totalComments) * 100, 1)
+    Write-Host "  Response rate (any reply):        $responseRate%" -ForegroundColor White
+    $overallHelpful = [math]::Round(($helpful / $totalComments) * 100, 1)
+    Write-Host "  Overall helpfulness (of total):   $overallHelpful%" -ForegroundColor White
+}
+
+# Per-repo helpfulness
+Write-Host "`n--- PER-REPO HELPFULNESS ---" -ForegroundColor Cyan
+foreach ($repoLabel in @("common", "msal", "broker")) {
+    $rc = $allResults | Where-Object { $_.Repo -eq $repoLabel }
+    $rHelp = ($rc | Where-Object { $_.Classification -eq "helpful-acknowledged" }).Count
+    $rUnhelp = ($rc | Where-Object { $_.Classification -eq "unhelpful-dismissed" }).Count
+    $rNoResp = ($rc | Where-Object { $_.Classification -eq "no-response" }).Count
+    $rMixed = ($rc | Where-Object { $_.Classification -eq "mixed-response" }).Count
+    $rUnclear = ($rc | Where-Object { $_.Classification -eq "replied-unclear" }).Count
+    $rTotal = $rc.Count
+    $rResponded = $rHelp + $rUnhelp + $rMixed + $rUnclear
+    $rRate = if ($rResponded -gt 0) { [math]::Round(($rHelp / $rResponded) * 100, 1) } else { "N/A" }
+    Write-Host "  $($repoLabel.ToUpper()) ($rTotal comments): Helpful=$rHelp, Unhelpful=$rUnhelp, NoResponse=$rNoResp, Mixed=$rMixed, Unclear=$rUnclear | Helpfulness=$rRate%"
+}
+
+# Top commented files
+Write-Host "`n--- TOP COMMENTED FILES ---" -ForegroundColor Cyan
+$allResults | Group-Object -Property FilePath | Sort-Object Count -Descending | Select-Object -First 10 | ForEach-Object {
+    Write-Host "  $($_.Count)x  $($_.Name)"
+}
+
+# Per-author breakdown
+Write-Host "`n--- COMMENTS RECEIVED PER PR AUTHOR ---" -ForegroundColor Cyan
+$allResults | Group-Object -Property PRAuthor | Sort-Object Count -Descending | ForEach-Object {
+    $authorComments = $_.Group
+    $authorHelp = ($authorComments | Where-Object { $_.Classification -eq "helpful-acknowledged" }).Count
+    $authorUnhelp = ($authorComments | Where-Object { $_.Classification -eq "unhelpful-dismissed" }).Count
+    $authorNoResp = ($authorComments | Where-Object { $_.Classification -eq "no-response" }).Count
+    $authorTotal = $_.Count
+    Write-Host "  $($_.Name): $authorTotal total (helpful=$authorHelp, unhelpful=$authorUnhelp, no-response=$authorNoResp)"
+}
+
+# Sample comments per classification
+foreach ($cls in @("helpful-acknowledged", "unhelpful-dismissed", "no-response", "replied-unclear")) {
+    $clsComments = $allResults | Where-Object { $_.Classification -eq $cls }
+    if ($clsComments.Count -gt 0) {
+        $displayName = switch ($cls) {
+            "helpful-acknowledged" { "HELPFUL COMMENTS" }
+            "unhelpful-dismissed"  { "UNHELPFUL/DISMISSED COMMENTS" }
+            "no-response"          { "IGNORED (NO RESPONSE) COMMENTS" }
+            "replied-unclear"      { "UNCLEAR RESPONSE COMMENTS" }
+        }
+        $color = switch ($cls) {
+            "helpful-acknowledged" { "Green" }
+            "unhelpful-dismissed"  { "Red" }
+            "no-response"          { "DarkGray" }
+            "replied-unclear"      { "DarkYellow" }
+        }
+        Write-Host "`n--- SAMPLE: $displayName ---" -ForegroundColor $color
+        $clsComments | Select-Object -First 3 | ForEach-Object {
+            Write-Host "  PR #$($_.PRNumber) ($($_.Repo)) - $($_.FilePath)" -ForegroundColor White
+            $excerpt = ($_.CommentBody -replace "`n"," " -replace "`r","")
+            if ($excerpt.Length -gt 180) { $excerpt = $excerpt.Substring(0, 180) + "..." }
+            Write-Host "    Copilot: $excerpt" -ForegroundColor $color
+            if ($_.HumanReplyText.Length -gt 0) {
+                $replyExcerpt = ($_.HumanReplyText -replace "`n"," " -replace "`r","")
+                if ($replyExcerpt.Length -gt 120) { $replyExcerpt = $replyExcerpt.Substring(0, 120) + "..." }
+                Write-Host "    Human:   $replyExcerpt" -ForegroundColor White
+            }
+            Write-Host ""
+        }
+    }
+}
+
+Write-Host "`n================================================================" -ForegroundColor Yellow
+Write-Host "Analysis complete. Raw data: $OutputDir" -ForegroundColor White
+Write-Host "================================================================" -ForegroundColor Yellow
diff --git a/.github/skills/copilot-review-analyst/scripts/final-classification.ps1 b/.github/skills/copilot-review-analyst/scripts/final-classification.ps1
new file mode 100644
index 00000000..bf87113c
--- /dev/null
+++ b/.github/skills/copilot-review-analyst/scripts/final-classification.ps1
@@ -0,0 +1,376 @@
+<#
+.SYNOPSIS
+    Final classification of all Copilot review comments.
+    Applies keyword rules, loads manual audit decisions from external JSON,
+    merges GitHub accounts to real engineer names via external config.
+    Produces authoritative per-engineer and per-repo statistics.
+
+.PARAMETER OutputDir
+    Directory containing raw_results.json and precise.json from prior phases.
+
+.PARAMETER AccountMapFile
+    Path to JSON file mapping GitHub logins to display names.
+    Format: { "github_login": "DisplayName", ... }
+    If not provided, uses PR author login as-is.
+
+.PARAMETER ManualAuditFile
+    Path to JSON file with manual audit decisions from Phase 3.
+    Format: {
+        "genuineUnclearHelpful": ["reply pattern 1", ...],
+        "genuineUnclearHelpfulExtra": ["commit sha or pattern", ...],
+        "reauditFlipKeys": ["repo/prNum/filePattern", ...],
+        "mixedResponseVerdict": "not-helpful"
+    }
+    If not provided, genuinely unclear comments default to "not-helpful"
+    and file-changed-elsewhere/no-line-info default to "not-helpful".
+#>
+
+param(
+    [string]$OutputDir = "$env:TEMP\copilot-review-analysis",
+    [string]$AccountMapFile = "",
+    [string]$ManualAuditFile = ""
+)
+
+$rawData = Get-Content "$OutputDir\raw_results.json" | ConvertFrom-Json
+$preciseData = Get-Content "$OutputDir\precise.json" | ConvertFrom-Json
+
+# ========================================
+# LOAD EXTERNAL CONFIGURATION
+# ========================================
+
+# Account mapping: GitHub login -> display name
+$accountMap = @{}
+if ($AccountMapFile -and (Test-Path $AccountMapFile)) {
+    $mapRaw = Get-Content $AccountMapFile -Raw | ConvertFrom-Json
+    foreach ($prop in $mapRaw.PSObject.Properties) {
+        $accountMap[$prop.Name] = $prop.Value
+    }
+    Write-Host "Loaded account map: $($accountMap.Count) entries" -ForegroundColor Cyan
+} else {
+    Write-Host "No account map file provided — using raw GitHub logins" -ForegroundColor Yellow
+}
+
+# Manual audit decisions from Phase 3
+$genuineUnclearHelpful = @()
+$genuineUnclearHelpfulExtra = @()
+$reauditFlipKeys = @()
+$mixedResponseVerdict = "not-helpful"
+
+if ($ManualAuditFile -and (Test-Path $ManualAuditFile)) {
+    $auditRaw = Get-Content $ManualAuditFile -Raw | ConvertFrom-Json
+    if ($auditRaw.genuineUnclearHelpful) {
+        $genuineUnclearHelpful = @($auditRaw.genuineUnclearHelpful)
+    }
+    if ($auditRaw.genuineUnclearHelpfulExtra) {
+        $genuineUnclearHelpfulExtra = @($auditRaw.genuineUnclearHelpfulExtra)
+    }
+    if ($auditRaw.reauditFlipKeys) {
+        $reauditFlipKeys = @($auditRaw.reauditFlipKeys)
+    }
+    if ($auditRaw.mixedResponseVerdict) {
+        $mixedResponseVerdict = $auditRaw.mixedResponseVerdict
+    }
+    Write-Host "Loaded manual audit: $($genuineUnclearHelpful.Count) unclear-helpful patterns, $($reauditFlipKeys.Count) re-audit flips" -ForegroundColor Cyan
+} else {
+    Write-Host "No manual audit file provided — genuinely unclear and ambiguous comments will default to 'not-helpful'" -ForegroundColor Yellow
+}
+
+# ========================================
+# STEP 1: Keyword patterns for classifying replied comments
+# ========================================
+
+# Positive reply patterns
+$positivePatterns = @(
+    "good catch", "fixed", "done", "addressed", "will fix", "will address",
+    "thanks", "thank you", "agreed", "makes sense", "updated", "nice catch",
+    "you're right", "you are right", "correct", "valid point", "great catch",
+    "resolved", "will do", "good point", "fair point", "acknowledged",
+    "applied", "changed", "modified", "yep", "absolutely",
+    "i'll update", "i will update", "i'll fix", "i will fix",
+    "good suggestion", "great suggestion", "nice suggestion",
+    "will change", "will update", "pushed a fix", "committed",
+    "good find", "great find", "indeed",
+    "making the change", "i've updated", "i've fixed"
+)
+
+# Negative reply patterns
+$negativePatterns = @(
+    "not applicable", "n/a", "won't fix", "wontfix", "by design",
+    "intentional", "false positive", "not relevant", "ignore",
+    "doesn't apply", "not needed", "unnecessary", "nah", "no need",
+    "disagree", "incorrect", "wrong", "not accurate", "hallucin",
+    "not a real issue", "not an issue", "this is fine", "it's fine",
+    "already handled", "already done", "not applicable here",
+    "copilot is wrong", "bot is wrong", "misunderstanding",
+    "out of scope", "does not apply", "not a concern", "not a problem",
+    "doesn't matter", "won't happen", "can't happen", "impossible"
+)
+
+# Delegated to copilot pattern
+$delegatedPattern = '@copilot'
+
+# Acknowledged action patterns (from unclear reclassification)
+$acknowledgedPatterns = @(
+    "added tests?", "refactored", "removed", "reverted", "renamed",
+    "implemented", "reworked", "update signature", "log warning",
+    "move check", "add test", "add unit test", "nice job", "good bot"
+)
+
+# Explained-away patterns (from unclear reclassification)
+$explainedPatterns = @(
+    "this is", "we don't", "we do not", "we aren't", "nope", "has been",
+    "it's a", "they're meant", "this has", "only used", "never been",
+    "was consciously", "just telemetry", "is just", "original behavior",
+    "overdo", "legacy", "can only", "can never", "doesn't need",
+    "suffix was", "timing is not", "skip", "most of the", "empty is fine",
+    "no longer", "will stick", "keep the current", "consciously"
+)
+
+# Outdated/dismissed patterns
+$outdatedPatterns = @("outdated", "dismissed")
+
+# ========================================
+# RE-AUDIT FLIP FUNCTION
+# ========================================
+function Test-ReauditFlip($repo, $prNum, $filePath) {
+    foreach ($key in $script:reauditFlipKeys) {
+        $parts = $key -split "/"
+        $keyRepo = $parts[0]
+        $keyPR = $parts[1]
+        $keyFile = $parts[2]
+        if ($repo -eq $keyRepo -and "$prNum" -eq $keyPR -and $filePath -match [regex]::Escape($keyFile)) {
+            return $true
+        }
+    }
+    return $false
+}
+
+# ========================================
+# CLASSIFY EVERY COMMENT
+# ========================================
+$finalResults = @()
+
+foreach ($item in $rawData) {
+    $prAuthor = $item.PRAuthor
+    $engineer = if ($accountMap.ContainsKey($prAuthor)) { $accountMap[$prAuthor] } else { $prAuthor }
+    $commentId = $item.CommentId
+    $repo = $item.Repo
+    $prNum = $item.PRNumber
+    $filePath = $item.FilePath
+    $replied = $item.Classification -ne "no-response"
+    $verdict = "unknown"
+
+    if ($item.Classification -eq "helpful-acknowledged") {
+        $verdict = "helpful"
+    }
+    elseif ($item.Classification -eq "unhelpful-dismissed") {
+        $verdict = "not-helpful"
+    }
+    elseif ($item.Classification -eq "mixed-response") {
+        $verdict = $mixedResponseVerdict
+    }
+    elseif ($item.Classification -eq "replied-unclear") {
+        # Apply the reclassification cascade
+        $replyLower = $item.HumanReplyText.ToLower()
+
+        # Check delegated to copilot
+        if ($replyLower -match $delegatedPattern) {
+            $verdict = "helpful"
+        }
+        # Check acknowledged action
+        elseif ($false) { # placeholder, check below
+        }
+        else {
+            # Check acknowledged patterns
+            $isAcknowledged = $false
+            foreach ($p in $acknowledgedPatterns) {
+                if ($replyLower -match "\b$p\b") { $isAcknowledged = $true; break }
+            }
+
+            if ($isAcknowledged) {
+                $verdict = "helpful"
+            }
+            else {
+                # Check explained-away patterns
+                $isExplained = $false
+                foreach ($p in $explainedPatterns) {
+                    if ($replyLower -match [regex]::Escape($p)) { $isExplained = $true; break }
+                }
+
+                # Check outdated patterns
+                $isOutdated = $false
+                foreach ($p in $outdatedPatterns) {
+                    if ($replyLower -match "\b$p\b") { $isOutdated = $true; break }
+                }
+
+                if ($isExplained) {
+                    $verdict = "not-helpful"
+                }
+                elseif ($isOutdated) {
+                    $verdict = "not-helpful"
+                }
+                else {
+                    # Genuinely unclear - check manual audit helpful list
+                    $isManualHelpful = $false
+                    foreach ($pattern in $genuineUnclearHelpful) {
+                        if ($replyLower.Contains($pattern)) { $isManualHelpful = $true; break }
+                    }
+                    foreach ($pattern in $genuineUnclearHelpfulExtra) {
+                        if ($replyLower.Contains($pattern)) { $isManualHelpful = $true; break }
+                    }
+
+                    if ($isManualHelpful) {
+                        $verdict = "helpful"
+                    }
+                    else {
+                        # Everything else in genuinely unclear was not-helpful
+                        $verdict = "not-helpful"
+                    }
+                }
+            }
+        }
+    }
+    elseif ($item.Classification -eq "no-response") {
+        # Use diff verification results
+        $precise = $preciseData | Where-Object { $_.CommentId -eq $commentId }
+        if ($precise) {
+            $pv = $precise.Verdict
+            if ($pv -in @("suggestion-applied", "suggestion-likely-applied", "exact-lines-modified")) {
+                $verdict = "helpful"
+            }
+            elseif ($pv -eq "lines-modified-different-fix") {
+                # Nearby lines modified with a different approach — treat as helpful
+                # (the engineer addressed the concern differently than suggested)
+                $verdict = "helpful"
+            }
+            elseif ($pv -in @("file-changed-elsewhere", "file-changed-no-line-info")) {
+                # Check if this specific comment was flipped in re-audit
+                if (Test-ReauditFlip $repo $prNum $filePath) {
+                    $verdict = "helpful"
+                } else {
+                    $verdict = "not-helpful"
+                }
+            }
+            elseif ($pv -in @("file-not-changed", "no-subsequent-commits", "not-applied")) {
+                $verdict = "not-helpful"
+            }
+            else {
+                $verdict = "not-helpful"
+            }
+        }
+        else {
+            $verdict = "not-helpful"
+        }
+    }
+
+    $finalResults += [PSCustomObject]@{
+        Engineer    = $engineer
+        Repo        = $repo
+        PRNumber    = $prNum
+        PRAuthor    = $prAuthor
+        CommentId   = $commentId
+        FilePath    = $filePath
+        Replied     = $replied
+        Verdict     = $verdict
+        OrigClass   = $item.Classification
+    }
+}
+
+# Save final results
+$finalResults | ConvertTo-Json -Depth 5 | Out-File "$OutputDir\final_classification.json" -Encoding utf8
+
+# ========================================
+# VALIDATE TOTALS
+# ========================================
+$totalHelp = ($finalResults | Where-Object { $_.Verdict -eq "helpful" }).Count
+$totalNot = ($finalResults | Where-Object { $_.Verdict -eq "not-helpful" }).Count
+$totalUnknown = ($finalResults | Where-Object { $_.Verdict -eq "unknown" }).Count
+$totalReplied = ($finalResults | Where-Object { $_.Replied -eq $true }).Count
+$totalIgnored = ($finalResults | Where-Object { $_.Replied -eq $false }).Count
+
+Write-Host "================================================================"
+Write-Host "FINAL CLASSIFICATION VALIDATION"
+Write-Host "================================================================"
+Write-Host "Total comments: $($finalResults.Count)"
+Write-Host "Helpful: $totalHelp"
+Write-Host "Not helpful: $totalNot"
+Write-Host "Unknown: $totalUnknown"
+Write-Host "Replied: $totalReplied"
+Write-Host "Ignored: $totalIgnored"
+Write-Host "Sum check: $($totalHelp + $totalNot + $totalUnknown) (should be $($finalResults.Count))"
+Write-Host ""
+
+# ========================================
+# PER-ENGINEER STATS
+# ========================================
+Write-Host "================================================================"
+Write-Host "PER-ENGINEER FINAL STATS"
+Write-Host "================================================================"
+
+$engineers = $finalResults | Group-Object Engineer | Sort-Object { $_.Group.Count } -Descending
+foreach ($eg in $engineers) {
+    $name = $eg.Name
+    $comments = $eg.Group
+    $total = $comments.Count
+    $helped = ($comments | Where-Object { $_.Verdict -eq "helpful" }).Count
+    $notHelped = ($comments | Where-Object { $_.Verdict -eq "not-helpful" }).Count
+    $unknown = ($comments | Where-Object { $_.Verdict -eq "unknown" }).Count
+    $replied = ($comments | Where-Object { $_.Replied -eq $true }).Count
+    $ignored = ($comments | Where-Object { $_.Replied -eq $false }).Count
+    $responseRate = [math]::Round(($replied / $total) * 100, 1)
+    $helpfulness = [math]::Round(($helped / $total) * 100, 1)
+    $prs = ($comments | Select-Object -Property PRNumber,Repo -Unique).Count
+
+    Write-Host "$name | $total comments | $prs PRs | Replied=$replied Ignored=$ignored RR=$responseRate% | Helpful=$helped Not=$notHelped Unknown=$unknown | H=$helpfulness%"
+}
+
+# ========================================
+# PER-REPO STATS
+# ========================================
+Write-Host ""
+Write-Host "================================================================"
+Write-Host "PER-REPO FINAL STATS"
+Write-Host "================================================================"
+
+foreach ($repoLabel in @("common", "msal", "broker")) {
+    $rc = $finalResults | Where-Object { $_.Repo -eq $repoLabel }
+    $total = $rc.Count
+    $helped = ($rc | Where-Object { $_.Verdict -eq "helpful" }).Count
+    $notHelped = ($rc | Where-Object { $_.Verdict -eq "not-helpful" }).Count
+    $replied = ($rc | Where-Object { $_.Replied -eq $true }).Count
+    $prsWithComments = ($rc | Select-Object -Property PRNumber -Unique).Count
+
+    $prsFile = "$env:TEMP\${repoLabel}_prs.json"
+    $allPRs = Get-Content $prsFile | ConvertFrom-Json
+    $humanPRs = ($allPRs | Where-Object { $_.author.login -notin @("app/copilot-swe-agent", "dependabot[bot]", "github-actions[bot]") }).Count
+
+    Write-Host "$($repoLabel.ToUpper()) | $total comments | $prsWithComments/$humanPRs PRs reviewed | Helpful=$helped ($([math]::Round($helped/$total*100,1))%) Not=$notHelped ($([math]::Round($notHelped/$total*100,1))%) | RR=$([math]::Round($replied/$total*100,1))%"
+}
+
+# ========================================
+# RESPONSE BEHAVIOR STATS
+# ========================================
+Write-Host ""
+Write-Host "================================================================"
+Write-Host "OVERALL RESPONSE BEHAVIOR"
+Write-Host "================================================================"
+Write-Host "Total: $($finalResults.Count)"
+Write-Host "Replied: $totalReplied ($([math]::Round($totalReplied/$finalResults.Count*100,1))%)"
+Write-Host "Ignored: $totalIgnored ($([math]::Round($totalIgnored/$finalResults.Count*100,1))%)"
+Write-Host ""
+Write-Host "Of REPLIED ($totalReplied):"
+$repliedHelp = ($finalResults | Where-Object { $_.Replied -and $_.Verdict -eq "helpful" }).Count
+$repliedNot = ($finalResults | Where-Object { $_.Replied -and $_.Verdict -eq "not-helpful" }).Count
+Write-Host "  Helpful: $repliedHelp ($([math]::Round($repliedHelp/$totalReplied*100,1))%)"
+Write-Host "  Not helpful: $repliedNot ($([math]::Round($repliedNot/$totalReplied*100,1))%)"
+Write-Host ""
+Write-Host "Of IGNORED ($totalIgnored):"
+$ignoredHelp = ($finalResults | Where-Object { -not $_.Replied -and $_.Verdict -eq "helpful" }).Count
+$ignoredNot = ($finalResults | Where-Object { -not $_.Replied -and $_.Verdict -eq "not-helpful" }).Count
+Write-Host "  Helpful (silently applied): $ignoredHelp ($([math]::Round($ignoredHelp/$totalIgnored*100,1))%)"
+Write-Host "  Not helpful: $ignoredNot ($([math]::Round($ignoredNot/$totalIgnored*100,1))%)"
+
+Write-Host ""
+Write-Host "================================================================"
+Write-Host "Data saved to: $OutputDir\final_classification.json"
+Write-Host "================================================================"
diff --git a/.github/skills/copilot-review-analyst/scripts/precise.ps1 b/.github/skills/copilot-review-analyst/scripts/precise.ps1
new file mode 100644
index 00000000..5ee2d529
--- /dev/null
+++ b/.github/skills/copilot-review-analyst/scripts/precise.ps1
@@ -0,0 +1,450 @@
+<#
+.SYNOPSIS
+    Precise verification: For every no-response Copilot comment, check
+    if the specific suggestion was applied or the exact line range was modified.
+    
+    Approach:
+    1. For each comment, get the diff between comment's commit and PR head
+    2. Check if the diff for the commented file has hunks overlapping the 
+       comment's line range (strong signal: the exact lines were modified)
+    3. For suggestion blocks, additionally check if key code tokens from 
+       the suggestion appear as additions (+lines) in the diff
+#>
+
+$ErrorActionPreference = "Continue"
+$OutputDir = "$env:TEMP\copilot-review-analysis"
+
+$COPILOT_USERS = @("Copilot", "copilot-pull-request-reviewer[bot]")
+
+$repoSlugs = @{
+    "common" = "AzureAD/microsoft-authentication-library-common-for-android"
+    "msal"   = "AzureAD/microsoft-authentication-library-for-android"
+    "broker" = "identity-authnz-teams/ad-accounts-for-android"
+}
+
+# Load raw data
+$rawData = Get-Content "$OutputDir\raw_results.json" | ConvertFrom-Json
+$noResponse = $rawData | Where-Object { $_.Classification -eq "no-response" }
+Write-Host "Total no-response comments to verify: $($noResponse.Count)" -ForegroundColor Cyan
+
+# Caches
+$prCommentApiCache = @{}
+$prHeadCache = @{}
+$diffCache = @{}  # "repo/pr/commitA...commitB" -> diff data per file
+
+function Get-PRComments($repo, $prNum) {
+    $key = "$repo/$prNum"
+    if (-not $prCommentApiCache.ContainsKey($key)) {
+        $slug = $script:repoSlugs[$repo]
+        try {
+            $raw = gh api "repos/$slug/pulls/$prNum/comments" --paginate 2>&1
+            $prCommentApiCache[$key] = $raw | ConvertFrom-Json
+        } catch {
+            $prCommentApiCache[$key] = @()
+        }
+    }
+    return $prCommentApiCache[$key]
+}
+
+function Get-PRHead($repo, $prNum) {
+    $key = "$repo/$prNum"
+    if (-not $prHeadCache.ContainsKey($key)) {
+        $slug = $script:repoSlugs[$repo]
+        try {
+            $data = gh api "repos/$slug/pulls/$prNum" --jq '.head.sha' 2>&1
+            $prHeadCache[$key] = $data.Trim()
+        } catch {
+            $prHeadCache[$key] = ""
+        }
+    }
+    return $prHeadCache[$key]
+}
+
+function Get-FileDiff($repo, $prNum, $commitA, $commitB, $filePath) {
+    $slug = $script:repoSlugs[$repo]
+    $cacheKey = "$repo/$prNum/$commitA/$commitB"
+    
+    if (-not $diffCache.ContainsKey($cacheKey)) {
+        try {
+            # Get the entire compare result (all files)
+            $rawJson = gh api "repos/$slug/compare/${commitA}...${commitB}" 2>&1
+            $compareData = $rawJson | ConvertFrom-Json
+            
+            # Build a hash of file -> patch data
+            $fileDiffs = @{}
+            foreach ($f in $compareData.files) {
+                $fileDiffs[$f.filename] = @{
+                    Status = $f.status
+                    Patch = $f.patch
+                    Additions = $f.additions
+                    Deletions = $f.deletions
+                    Changes = $f.changes
+                }
+            }
+            $diffCache[$cacheKey] = $fileDiffs
+        } catch {
+            $diffCache[$cacheKey] = @{}
+        }
+    }
+    
+    $diffs = $diffCache[$cacheKey]
+    if ($diffs.ContainsKey($filePath)) {
+        return $diffs[$filePath]
+    }
+    return $null
+}
+
+function Extract-SuggestionTokens($body) {
+    # Extract key tokens from suggestion blocks for matching against diff
+    $pattern = '(?s)```suggestion\r?\n(.*?)```'
+    $m = [regex]::Match($body, $pattern)
+    if (-not $m.Success) { return @() }
+    
+    $sugCode = $m.Groups[1].Value
+    $lines = $sugCode -split "`n" | ForEach-Object { $_.TrimEnd("`r").Trim() }
+    
+    # Keep significant lines (not blank, not just punctuation)
+    $tokens = @()
+    foreach ($line in $lines) {
+        if ($line.Length -le 3) { continue }
+        if ($line -match '^\s*[\{\}\(\)\;\*\/]+\s*$') { continue }
+        if ($line -match '^\s*$') { continue }
+        
+        # Extract distinctive tokens: identifiers, strings, method calls
+        # We just use the trimmed line content as a token  
+        $tokens += $line
+    }
+    return $tokens
+}
+
+function Check-DiffHunksOverlap($patchText, $commentStartLine, $commentEndLine) {
+    # Parse diff hunk headers to find which original line ranges were modified
+    if (-not $patchText) { return $false }
+    
+    $hunkPattern = '@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@'
+    $hunks = [regex]::Matches($patchText, $hunkPattern)
+    
+    $window = 5  # Allow +-5 lines of drift
+    $rangeStart = [Math]::Max(1, $commentStartLine - $window)
+    $rangeEnd = $commentEndLine + $window
+    
+    foreach ($hunk in $hunks) {
+        $oldStart = [int]$hunk.Groups[1].Value
+        $oldCount = if ($hunk.Groups[2].Success) { [int]$hunk.Groups[2].Value } else { 1 }
+        $oldEnd = $oldStart + $oldCount - 1
+        
+        if ($oldEnd -ge $rangeStart -and $oldStart -le $rangeEnd) {
+            return $true
+        }
+    }
+    return $false
+}
+
+function Check-SuggestionInDiff($patchText, $suggestionTokens) {
+    # Check if key tokens from the suggestion appear as additions in the diff
+    if (-not $patchText -or $suggestionTokens.Count -eq 0) { return 0 }
+    
+    # Extract added lines from the diff
+    $addedLines = @()
+    foreach ($line in ($patchText -split "`n")) {
+        if ($line.StartsWith("+") -and -not $line.StartsWith("+++")) {
+            $addedLines += $line.Substring(1).Trim()
+        }
+    }
+    
+    if ($addedLines.Count -eq 0) { return 0 }
+    
+    $matchCount = 0
+    foreach ($token in $suggestionTokens) {
+        $normalizedToken = $token.Trim().ToLower()
+        if ($normalizedToken.Length -lt 5) { continue }
+        
+        foreach ($addedLine in $addedLines) {
+            if ($addedLine.ToLower().Contains($normalizedToken)) {
+                $matchCount++
+                break
+            }
+        }
+    }
+    
+    return $matchCount
+}
+
+# ========================================
+# MAIN ANALYSIS LOOP
+# ========================================
+$results = @()
+$count = 0
+
+foreach ($item in $noResponse) {
+    $count++
+    $repo = $item.Repo
+    $prNum = $item.PRNumber
+    $commentId = $item.CommentId
+    $filePath = $item.FilePath
+    $body = $item.CommentBody
+    $hasSuggestion = $body -match '```suggestion'
+    
+    # Get full API data for this comment
+    $allComments = Get-PRComments $repo $prNum
+    $apiComment = $allComments | Where-Object { $_.id -eq $commentId }
+    
+    if ($null -eq $apiComment) {
+        $results += [PSCustomObject]@{
+            Repo = $repo; PRNumber = $prNum; CommentId = $commentId
+            FilePath = $filePath; PRAuthor = $item.PRAuthor
+            HasSuggestion = $hasSuggestion; Verdict = "unknown"
+            Evidence = "Comment not found in API"; CommentExcerpt = ""
+        }
+        continue
+    }
+    
+    $commitId = $apiComment.commit_id
+    $commentLine = if ($apiComment.line) { [int]$apiComment.line } else { 0 }
+    $commentStartLine = if ($apiComment.start_line) { [int]$apiComment.start_line } else { $commentLine }
+    if ($commentStartLine -eq 0) { $commentStartLine = $commentLine }
+    
+    $headSha = Get-PRHead $repo $prNum
+    
+    if (-not $headSha -or -not $commitId -or $headSha -eq $commitId) {
+        $results += [PSCustomObject]@{
+            Repo = $repo; PRNumber = $prNum; CommentId = $commentId
+            FilePath = $filePath; PRAuthor = $item.PRAuthor
+            HasSuggestion = $hasSuggestion; Verdict = "no-subsequent-commits"
+            Evidence = "Copilot commented on final commit (no commits after review)"
+            CommentExcerpt = ""
+        }
+        continue
+    }
+    
+    # Get the diff for this file between comment commit and PR head
+    $fileDiff = Get-FileDiff $repo $prNum $commitId $headSha $filePath
+    
+    if ($null -eq $fileDiff) {
+        # File was NOT modified after the comment commit
+        $results += [PSCustomObject]@{
+            Repo = $repo; PRNumber = $prNum; CommentId = $commentId
+            FilePath = $filePath; PRAuthor = $item.PRAuthor
+            HasSuggestion = $hasSuggestion; Verdict = "file-not-changed"
+            Evidence = "File was not modified in any commit after Copilot's review"
+            CommentExcerpt = ""
+        }
+        continue
+    }
+    
+    $patchText = $fileDiff.Patch
+    
+    if ($hasSuggestion) {
+        # === SUGGESTION BLOCK: check if suggestion tokens appear in diff additions ===
+        $tokens = Extract-SuggestionTokens $body
+        $tokenMatchCount = Check-SuggestionInDiff $patchText $tokens
+        $totalTokens = ($tokens | Where-Object { $_.Trim().Length -ge 5 }).Count
+        if ($totalTokens -eq 0) { $totalTokens = 1 }  # avoid div by zero
+        $tokenMatchRatio = $tokenMatchCount / $totalTokens
+        
+        # Also check line-range overlap
+        $linesOverlap = $false
+        if ($commentLine -gt 0) {
+            $linesOverlap = Check-DiffHunksOverlap $patchText $commentStartLine $commentLine
+        }
+        
+        if ($tokenMatchRatio -ge 0.5 -and $linesOverlap) {
+            $verdict = "suggestion-applied"
+        } elseif ($tokenMatchRatio -ge 0.5) {
+            $verdict = "suggestion-likely-applied"
+        } elseif ($linesOverlap) {
+            $verdict = "lines-modified-different-fix"
+        } elseif ($fileDiff.Changes -gt 0) {
+            $verdict = "file-changed-elsewhere"
+        } else {
+            $verdict = "not-applied"
+        }
+        
+        $evidence = "Tokens matched: $tokenMatchCount/$totalTokens ($([math]::Round($tokenMatchRatio*100))%). Lines overlap: $linesOverlap. File changes: +$($fileDiff.Additions)/-$($fileDiff.Deletions)"
+        
+    } else {
+        # === PROSE COMMENT: check if the exact line range was modified ===
+        if ($commentLine -eq 0) {
+            # No line info - can only tell if file changed
+            $verdict = if ($fileDiff.Changes -gt 0) { "file-changed-no-line-info" } else { "not-applied" }
+            $evidence = "No line number in comment. File changes: +$($fileDiff.Additions)/-$($fileDiff.Deletions)"
+        } else {
+            $linesOverlap = Check-DiffHunksOverlap $patchText $commentStartLine $commentLine
+            if ($linesOverlap) {
+                $verdict = "exact-lines-modified"
+            } elseif ($fileDiff.Changes -gt 0) {
+                $verdict = "file-changed-elsewhere"
+            } else {
+                $verdict = "not-applied"
+            }
+            $evidence = "Comment lines $commentStartLine-$commentLine. Lines overlap: $linesOverlap. File changes: +$($fileDiff.Additions)/-$($fileDiff.Deletions)"
+        }
+    }
+    
+    $excerpt = ($body -replace "`n"," " -replace "`r","")
+    if ($excerpt.Length -gt 120) { $excerpt = $excerpt.Substring(0, 120) + "..." }
+    
+    $results += [PSCustomObject]@{
+        Repo = $repo; PRNumber = $prNum; CommentId = $commentId
+        FilePath = $filePath; PRAuthor = $item.PRAuthor
+        HasSuggestion = $hasSuggestion; Verdict = $verdict
+        Evidence = $evidence; CommentExcerpt = $excerpt
+    }
+    
+    if ($count % 25 -eq 0) {
+        Write-Host "  Processed $count/$($noResponse.Count)..." -ForegroundColor DarkGray
+        Start-Sleep -Milliseconds 200
+    }
+}
+
+# Save
+$results | ConvertTo-Json -Depth 5 | Out-File "$OutputDir\precise.json" -Encoding utf8
+
+# ========================================
+# STATISTICS
+# ========================================
+Write-Host ""
+Write-Host "================================================================" -ForegroundColor Yellow
+Write-Host "     PRECISE VERIFICATION RESULTS" -ForegroundColor Yellow
+Write-Host "================================================================" -ForegroundColor Yellow
+
+# Suggestion block results
+$sugResults = $results | Where-Object { $_.HasSuggestion -eq $true }
+Write-Host ""
+Write-Host "--- SUGGESTION BLOCK COMMENTS ($($sugResults.Count)) ---" -ForegroundColor Cyan
+$sugResults | Group-Object Verdict | Sort-Object Count -Descending | ForEach-Object {
+    $pct = [math]::Round(($_.Count / [Math]::Max(1,$sugResults.Count)) * 100, 1)
+    Write-Host "  $($_.Name): $($_.Count) ($pct%)"
+}
+
+# Prose results
+$proseResults = $results | Where-Object { $_.HasSuggestion -eq $false }
+Write-Host ""
+Write-Host "--- PROSE COMMENTS ($($proseResults.Count)) ---" -ForegroundColor Cyan
+$proseResults | Group-Object Verdict | Sort-Object Count -Descending | ForEach-Object {
+    $pct = [math]::Round(($_.Count / [Math]::Max(1,$proseResults.Count)) * 100, 1)
+    Write-Host "  $($_.Name): $($_.Count) ($pct%)"
+}
+
+# All combined
+Write-Host ""
+Write-Host "--- ALL COMMENTS COMBINED ($($results.Count)) ---" -ForegroundColor Cyan
+$results | Group-Object Verdict | Sort-Object Count -Descending | ForEach-Object {
+    $pct = [math]::Round(($_.Count / $results.Count) * 100, 1)
+    Write-Host "  $($_.Name): $($_.Count) ($pct%)"
+}
+
+# Strong evidence categories
+$strongApplied = @("suggestion-applied", "suggestion-likely-applied", "exact-lines-modified")
+$weakApplied = @("lines-modified-different-fix")
+$notApplied = @("file-not-changed", "not-applied", "file-changed-elsewhere", "file-changed-no-line-info")
+$noData = @("no-subsequent-commits", "unknown")
+
+$strongCount = ($results | Where-Object { $_.Verdict -in $strongApplied }).Count
+$weakCount = ($results | Where-Object { $_.Verdict -in $weakApplied }).Count
+$notCount = ($results | Where-Object { $_.Verdict -in $notApplied }).Count
+$noDataCount = ($results | Where-Object { $_.Verdict -in $noData }).Count
+
+Write-Host ""
+Write-Host "================================================================" -ForegroundColor Yellow
+Write-Host "     EVIDENCE-BASED SUMMARY" -ForegroundColor Yellow
+Write-Host "================================================================" -ForegroundColor Yellow
+Write-Host ""
+Write-Host "  STRONG: Suggestion applied OR exact lines modified:            $strongCount ($([math]::Round($strongCount/$results.Count*100,1))%)" -ForegroundColor Green
+Write-Host "  MODERATE: Lines near comment modified (different fix):          $weakCount ($([math]::Round($weakCount/$results.Count*100,1))%)" -ForegroundColor DarkGreen
+Write-Host "  NOT APPLIED: File not changed OR changes elsewhere in file:   $notCount ($([math]::Round($notCount/$results.Count*100,1))%)" -ForegroundColor Red
+Write-Host "  NO DATA: No commits after review / unknown:                   $noDataCount ($([math]::Round($noDataCount/$results.Count*100,1))%)" -ForegroundColor DarkGray
+
+# =======================================
+# FINAL COMBINED REPORT
+# =======================================
+Write-Host ""
+Write-Host "================================================================" -ForegroundColor Yellow
+Write-Host "     FINAL COMBINED ANALYSIS (ALL $($rawData.Count) COMMENTS)" -ForegroundColor Yellow
+Write-Host "================================================================" -ForegroundColor Yellow
+
+$totalAll = $rawData.Count
+$rHelp = ($rawData | Where-Object { $_.Classification -eq "helpful-acknowledged" }).Count
+$rUnhelp = ($rawData | Where-Object { $_.Classification -eq "unhelpful-dismissed" }).Count
+$rUnclear = ($rawData | Where-Object { $_.Classification -eq "replied-unclear" }).Count
+$rMixed = ($rawData | Where-Object { $_.Classification -eq "mixed-response" }).Count
+
+$totalConfirmedHelpful = $rHelp + $strongCount
+$totalLikelyHelpful = $weakCount
+$totalConfirmedUnhelpful = $rUnhelp + $notCount
+$totalAmbiguous = $rUnclear + $rMixed + $noDataCount
+
+Write-Host ""
+Write-Host "  CONFIRMED HELPFUL:" -ForegroundColor Green
+Write-Host "    Replied & acknowledged:       $rHelp" -ForegroundColor Green
+Write-Host "    Silent: suggestion applied:   $(($results | Where-Object { $_.Verdict -in @('suggestion-applied','suggestion-likely-applied') }).Count)" -ForegroundColor Green
+Write-Host "    Silent: exact lines modified: $(($results | Where-Object { $_.Verdict -eq 'exact-lines-modified' }).Count)" -ForegroundColor Green
+Write-Host "    TOTAL CONFIRMED HELPFUL:      $totalConfirmedHelpful ($([math]::Round($totalConfirmedHelpful/$totalAll*100,1))%)" -ForegroundColor Green
+
+Write-Host ""
+Write-Host "  MODERATE (nearby lines changed, different fix):" -ForegroundColor DarkGreen
+Write-Host "    Total:                        $totalLikelyHelpful ($([math]::Round($totalLikelyHelpful/$totalAll*100,1))%)" -ForegroundColor DarkGreen
+
+Write-Host ""
+Write-Host "  CONFIRMED NOT HELPFUL:" -ForegroundColor Red
+Write-Host "    Replied & dismissed:          $rUnhelp" -ForegroundColor Red
+Write-Host "    Silent: file not changed:     $(($results | Where-Object { $_.Verdict -eq 'file-not-changed' }).Count)" -ForegroundColor Red
+Write-Host "    Silent: changes elsewhere:    $(($results | Where-Object { $_.Verdict -in @('file-changed-elsewhere','file-changed-no-line-info') }).Count)" -ForegroundColor Red
+Write-Host "    Silent: not applied:          $(($results | Where-Object { $_.Verdict -eq 'not-applied' }).Count)" -ForegroundColor Red
+Write-Host "    TOTAL CONFIRMED NOT HELPFUL:  $totalConfirmedUnhelpful ($([math]::Round($totalConfirmedUnhelpful/$totalAll*100,1))%)" -ForegroundColor Red
+
+Write-Host ""
+Write-Host "  AMBIGUOUS:" -ForegroundColor DarkYellow
+Write-Host "    Replied unclear:              $rUnclear" -ForegroundColor DarkYellow
+Write-Host "    Mixed:                        $rMixed" -ForegroundColor DarkYellow
+Write-Host "    No data:                      $noDataCount" -ForegroundColor DarkYellow
+Write-Host "    TOTAL AMBIGUOUS:              $totalAmbiguous ($([math]::Round($totalAmbiguous/$totalAll*100,1))%)" -ForegroundColor DarkYellow
+
+if (($totalConfirmedHelpful + $totalConfirmedUnhelpful) -gt 0) {
+    $ratio = [math]::Round($totalConfirmedHelpful / ($totalConfirmedHelpful + $totalConfirmedUnhelpful) * 100, 1)
+    Write-Host ""
+    Write-Host "  HELPFULNESS RATIO (confirmed only): $ratio%" -ForegroundColor White
+}
+
+# Per-repo
+Write-Host ""
+Write-Host "--- PER-REPO ---" -ForegroundColor Cyan
+foreach ($repoLabel in @("common", "msal", "broker")) {
+    $rr = $rawData | Where-Object { $_.Repo -eq $repoLabel }
+    $pr = $results | Where-Object { $_.Repo -eq $repoLabel }
+    $help = ($rr | Where-Object { $_.Classification -eq "helpful-acknowledged" }).Count + ($pr | Where-Object { $_.Verdict -in $strongApplied }).Count
+    $unhelp = ($rr | Where-Object { $_.Classification -eq "unhelpful-dismissed" }).Count + ($pr | Where-Object { $_.Verdict -in $notApplied }).Count
+    $total = $rr.Count
+    Write-Host "  $($repoLabel.ToUpper()) ($total): Helpful=$help ($([math]::Round($help/$total*100,1))%), Not helpful=$unhelp ($([math]::Round($unhelp/$total*100,1))%)"
+}
+
+# Samples
+Write-Host ""
+Write-Host "--- SAMPLE: SUGGESTION APPLIED ---" -ForegroundColor Green
+$results | Where-Object { $_.Verdict -in @("suggestion-applied","suggestion-likely-applied") } | Select-Object -First 5 | ForEach-Object {
+    Write-Host "  PR#$($_.PRNumber) ($($_.Repo)) $($_.FilePath)"
+    Write-Host "    $($_.Evidence)"
+    Write-Host "    $($_.CommentExcerpt)"
+    Write-Host ""
+}
+
+Write-Host "--- SAMPLE: EXACT LINES MODIFIED ---" -ForegroundColor Green
+$results | Where-Object { $_.Verdict -eq "exact-lines-modified" } | Select-Object -First 5 | ForEach-Object {
+    Write-Host "  PR#$($_.PRNumber) ($($_.Repo)) $($_.FilePath)"
+    Write-Host "    $($_.Evidence)"
+    Write-Host "    $($_.CommentExcerpt)"
+    Write-Host ""
+}
+
+Write-Host "--- SAMPLE: FILE NOT CHANGED (truly ignored) ---" -ForegroundColor Red
+$results | Where-Object { $_.Verdict -eq "file-not-changed" } | Select-Object -First 5 | ForEach-Object {
+    Write-Host "  PR#$($_.PRNumber) ($($_.Repo)) $($_.FilePath)"
+    Write-Host "    $($_.Evidence)"
+    Write-Host "    $($_.CommentExcerpt)"
+    Write-Host ""
+}
+
+Write-Host ""
+Write-Host "================================================================" -ForegroundColor Yellow
+Write-Host "Data: $OutputDir\precise.json" -ForegroundColor White
+Write-Host "================================================================" -ForegroundColor Yellow

From 5ca15b54444c46d4411f885ed3888ac1e92d1625 Mon Sep 17 00:00:00 2001
From: Shahzaib <shahzaib.jameel@microsoft.com>
Date: Wed, 25 Mar 2026 13:30:45 -0700
Subject: [PATCH 2/5] Updates to review analyst

---
 .../skills/copilot-review-analyst/SKILL.md    |  96 ++++----
 .../references/classification-rules.md        | 159 +++++---------
 .../references/manual-audit-template.json     |  27 +--
 .../references/report-formatting.md           |  30 +--
 .../scripts/analyze.ps1                       | 182 ++--------------
 .../scripts/final-classification.ps1          | 206 ++++--------------
 .../scripts/precise.ps1                       |  93 ++------
 7 files changed, 222 insertions(+), 571 deletions(-)

diff --git a/.github/skills/copilot-review-analyst/SKILL.md b/.github/skills/copilot-review-analyst/SKILL.md
index c2935799..f25d1a4f 100644
--- a/.github/skills/copilot-review-analyst/SKILL.md
+++ b/.github/skills/copilot-review-analyst/SKILL.md
@@ -31,11 +31,11 @@ The analysis runs in 5 sequential phases. Scripts and templates are bundled in t
 - **Assets:** `assets/` (report templates — Markdown, HTML, Outlook HTML)
 - **References:** `references/` (classification rules, report formatting guide)
 
-### Phase 1: Data Collection + Keyword Classification
+### Phase 1: Data Collection
 
 **Script:** `scripts/analyze.ps1`
 
-Run to collect all Copilot inline review comments from human-authored PRs:
+Collect all Copilot inline review comments from human-authored PRs:
 
 ```powershell
 # Default: last 60 days
@@ -55,24 +55,24 @@ What it does:
 3. For each PR, call `repos/{slug}/pulls/{prNum}/comments` to get inline comments
 4. Filter to Copilot comments (user.login = "Copilot") that are top-level (not replies)
 5. Find human replies to each Copilot comment (matched via `in_reply_to_id`)
-6. Classify replies via keyword matching into: `helpful-acknowledged`, `unhelpful-dismissed`, `mixed-response`, `replied-unclear`, `no-response`
+6. Record whether each comment has a reply and capture the reply text (no classification at this stage)
 
 **Outputs:**
-- `$env:TEMP\copilot-review-analysis\raw_results.json` — all comments with initial classification
-- `$env:TEMP\copilot-review-analysis\review_summaries.json` — PR-level summary comments (for reference, not classified)
+- `$env:TEMP\copilot-review-analysis\raw_results.json` — all comments with `HasReply` flag and raw reply text
+- `$env:TEMP\copilot-review-analysis\review_summaries.json` — PR-level summary comments (for reference)
 
 ### Phase 2: Diff-Level Verification
 
 **Script:** `scripts/precise.ps1`
 
-For every `no-response` comment, verify whether the engineer silently acted on the feedback:
+For every comment with no reply, verify whether the engineer silently acted on the feedback:
 
 ```powershell
 .\.github\skills\copilot-review-analyst\scripts\precise.ps1
 ```
 
 What it does:
-1. Load `raw_results.json`, filter to `no-response` comments
+1. Load `raw_results.json`, filter to comments where `HasReply = false`
 2. For each comment, get the commit SHA it was left on and the PR head SHA
 3. Use `repos/{slug}/compare/{commitA}...{commitB}` to get the diff
 4. For **suggestion blocks**: extract code tokens, check if they appear as `+` lines in the diff
@@ -90,21 +90,23 @@ What it does:
 
 **Output:** `$env:TEMP\copilot-review-analysis\precise.json`
 
-### Phase 3: AI-Assisted Reply Classification
+### Phase 3: AI Reply Classification
 
-This phase is **manual** — performed by the agent (you) in conversation. Read the `replied-unclear` comments from `raw_results.json` and classify each one based on the reply text.
+This phase is performed by the agent (you) in conversation. Classify **every replied comment** by reading the full Copilot comment and engineer reply in context.
 
-See [references/classification-rules.md](references/classification-rules.md) for the full classification hierarchy and patterns.
+See [references/classification-rules.md](references/classification-rules.md) for detailed guidance on what counts as helpful vs not-helpful.
 
 **Process:**
-1. Load `raw_results.json` and filter to `Classification -eq "replied-unclear"`
-2. For each comment, read the `HumanReplyText` and `CommentBody`
-3. Apply the classification cascade from the rules reference
-4. Group results: which are helpful (acknowledged action), which are not helpful (explained away / dismissed), which are genuinely ambiguous
-5. For genuinely ambiguous ones, apply domain context judgment
-6. Also review `file-changed-elsewhere` and `file-changed-no-line-info` verdicts from Phase 2 to identify re-audit flips
+1. Load `raw_results.json` and filter to `HasReply -eq true`
+2. For each comment, read the `CommentBody` (Copilot's feedback) and `HumanReplyText` (engineer's reply)
+3. Classify as `helpful` or `not-helpful` based on the engineer's intent (read the full reply, don't keyword-match)
+4. Also review `file-changed-elsewhere` and `file-changed-no-line-info` verdicts from Phase 2 to identify re-audit flips
 
-**Output:** Write results to `$env:TEMP\copilot-review-analysis\manual-audit.json` using the template at `references/manual-audit-template.json`. This file is consumed by Phase 4.
+**Outputs** (write to `$env:TEMP\copilot-review-analysis\`):
+- `reply-verdicts.json` — `{ "commentId": "helpful"|"not-helpful", ... }` for every replied comment
+- `reaudit-flips.json` — `{ "reauditFlipKeys": ["repo/prNum/filePattern", ...] }` for no-reply comments with strong evidence
+
+See `references/manual-audit-template.json` for the schema.
 
 ### Phase 4: Final Classification
 
@@ -115,28 +117,22 @@ Merge all results into a single authoritative dataset:
 ```powershell
 .\.github\skills\copilot-review-analyst\scripts\final-classification.ps1 `
     -AccountMapFile ".github\skills\copilot-review-analyst\references\account-map.json" `
-    -ManualAuditFile "$env:TEMP\copilot-review-analysis\manual-audit.json"
+    -ReplyVerdictsFile "$env:TEMP\copilot-review-analysis\reply-verdicts.json" `
+    -ReauditFlipsFile "$env:TEMP\copilot-review-analysis\reaudit-flips.json"
 ```
 
 **Parameters:**
 - `-OutputDir` — Directory with `raw_results.json` and `precise.json` (default: `$env:TEMP\copilot-review-analysis`)
-- `-AccountMapFile` — Path to JSON mapping GitHub logins to display names. See `references/account-map.json` for the current team. If omitted, raw GitHub logins are used.
-- `-ManualAuditFile` — Path to JSON with Phase 3 manual audit decisions. See `references/manual-audit-template.json` for the schema. If omitted, all ambiguous comments default to "not-helpful".
+- `-AccountMapFile` — Path to JSON mapping GitHub logins to display names. See `references/account-map.json`.
+- `-ReplyVerdictsFile` — Path to JSON with Phase 3 AI verdicts for replied comments. If omitted, replied comments are "unknown".
+- `-ReauditFlipsFile` — Path to JSON with Phase 3 re-audit flips for no-reply comments. If omitted, file-changed-elsewhere defaults to "not-helpful".
 
 What it does:
 1. Load `raw_results.json` and `precise.json`
-2. Load account mapping and manual audit decisions from external JSON files
-3. Classify every comment using the full hierarchy:
-   - Replied + positive → helpful
-   - Replied + negative → not-helpful
-   - Replied + delegated (@copilot) → helpful
-   - Replied + acknowledged action → helpful
-   - Replied + explained-away → not-helpful
-   - Replied + genuinely unclear → check manual audit file, else not-helpful
-   - No response + suggestion-applied/exact-lines-modified → helpful
-   - No response + file-changed-elsewhere → check re-audit list from manual audit file
-   - No response + file-not-changed/no-subsequent-commits → not-helpful (**note: conservative — see Key Principle below**)
-4. Produce per-engineer and per-repo statistics
+2. Load account mapping, reply verdicts (Phase 3), and re-audit flips (Phase 3)
+3. For replied comments: use the AI verdict from `reply-verdicts.json`
+4. For no-reply comments: use the Phase 2 diff verdict, with re-audit overrides
+5. Produce per-engineer and per-repo statistics
 
 **Output:** `$env:TEMP\copilot-review-analysis\final_classification.json`
 
@@ -145,22 +141,46 @@ What it does:
 Generate both Markdown and Outlook-compatible HTML reports.
 
 **Style/structure references** (in `assets/` — these contain data from the Jan-Mar 2026 analysis and serve as structural templates, NOT to be copied verbatim):
-- `assets/Copilot-Code-Review-Effectiveness-Report.md` — Markdown reference
-- `assets/Copilot-Code-Review-Effectiveness-Report-Outlook.html` — Outlook HTML reference
+- `assets/Copilot-Code-Review-Effectiveness-Report.md` — Markdown reference (~270 lines, ~3300 words)
+- `assets/Copilot-Code-Review-Effectiveness-Report-Outlook.html` — Outlook HTML reference (~430 lines, ~42KB)
 - `assets/Copilot-Code-Review-Effectiveness-Report.html` — Standard HTML reference
 
 **Important:** The asset templates contain hardcoded numbers (557 comments, specific percentages, engineer names, etc.) from the first analysis. For each new run, generate fresh reports using the same section structure and formatting patterns but with statistics computed from `final_classification.json`.
 
+#### MANDATORY: Read Templates Before Generating
+
+**You MUST read both the Markdown and Outlook HTML template files in full before generating any report.** Do not generate from memory or from the section table in `report-formatting.md` alone — the templates contain critical patterns that are not captured in the section list:
+
+- **Narrative prose paragraphs** between every table explaining the significance of the data (not just "here's a table")
+- **Callout boxes** (blue for insights, yellow for warnings) that frame key findings
+- **Background section** with team context and Copilot enablement framing
+- **"At a Glance" summary cards** with a detailed adoption callout underneath
+- **Scope metric cards** (Human PRs, PRs reviewed, Total comments, Avg per PR)
+- **Full Copilot comment text** in examples (not truncated snippets)
+- **Explanatory paragraphs after examples** describing why the comment was helpful/unhelpful
+- **Detailed Recommendations section** with prose paragraphs (not bullet points)
+- **Bar chart visualizations** (response rate bar, helpfulness verdict bar, per-repo bars) in HTML
+
+#### Quality Gate
+
+After generating each report, compare its dimensions against the template:
+- **Markdown:** Must be ≥250 lines and ≥3000 words. If significantly smaller, the report is missing narrative depth.
+- **Outlook HTML:** Must be ≥400 lines and ≥35KB. If significantly smaller, the report is missing visual elements or prose.
+
+If a report doesn't meet these thresholds, re-read the template and identify what's missing before saving.
+
 **Generate two versions of each report:**
 1. **Team-internal** — uses real engineer names (from account map). For the team.
 2. **Org-wide** — anonymizes engineers as "Engineer A", "Engineer B", etc., sorted by helpfulness descending. For sharing outside the team.
 
 **Process:**
 1. Load `final_classification.json`
-2. Compute aggregate statistics (total, per-repo, per-engineer)
-3. Generate reports using the section structure and formatting from the asset templates
-4. Collect notable examples for "What Copilot Is Good At" and "What Copilot Struggles With"
-5. Save to `~/.copilot-review-analysis/`:
+2. **Read full template files** (`assets/Copilot-Code-Review-Effectiveness-Report.md` and `assets/Copilot-Code-Review-Effectiveness-Report-Outlook.html`) — do NOT skip this step
+3. Compute aggregate statistics (total, per-repo, per-engineer, three-way breakdown)
+4. Load `raw_results.json` to collect full comment text for 4-5 helpful and 4-5 unhelpful examples
+5. Generate reports matching the template's section structure, narrative depth, and visual formatting
+6. **Verify dimensions** (word count, line count) against the quality gate thresholds
+7. Save to `~/.copilot-review-analysis/`:
    - `Copilot-Code-Review-Effectiveness-Report.md` (team, real names)
    - `Copilot-Code-Review-Effectiveness-Report-Anonymous.md` (org-wide)
    - `Copilot-Code-Review-Effectiveness-Report-Outlook.html` (team, real names)
diff --git a/.github/skills/copilot-review-analyst/references/classification-rules.md b/.github/skills/copilot-review-analyst/references/classification-rules.md
index 866700df..7685b431 100644
--- a/.github/skills/copilot-review-analyst/references/classification-rules.md
+++ b/.github/skills/copilot-review-analyst/references/classification-rules.md
@@ -1,133 +1,90 @@
 # Classification Rules
 
-Complete classification hierarchy for Copilot review comment analysis.
+Guide for the AI agent performing Phase 3 reply classification. Read each replied comment's full context (Copilot's comment + engineer's reply) and assign a verdict.
 
-## Classification Cascade (Priority Order)
+## Phase 3: Classifying Replied Comments
 
-Apply rules in this exact order. First match wins.
+For every comment where `HasReply = true`, read the `CommentBody` (what Copilot said) and `HumanReplyText` (what the engineer replied), then assign one of:
 
-### Replied Comments (has human reply)
+- **`helpful`** — The engineer's reply indicates Copilot's feedback led to (or will lead to) a code improvement
+- **`not-helpful`** — The engineer's reply indicates Copilot's feedback was wrong, irrelevant, or not actionable
 
-| Priority | Condition | Verdict |
-|----------|-----------|---------|
-| 1 | Reply matches positive keyword patterns | **Helpful** |
-| 2 | Reply matches negative keyword patterns | **Not Helpful** |
-| 3 | Both positive and negative matched (mixed) | Verdict from `mixedResponseVerdict` in manual audit file (default: **Not Helpful**) |
-| 4 | Reply contains `@copilot` (delegated fix) | **Helpful** |
-| 5 | Reply matches acknowledged-action patterns | **Helpful** |
-| 6 | Reply matches explained-away patterns | **Not Helpful** |
-| 7 | Reply matches outdated/dismissed patterns | **Not Helpful** |
-| 8 | Genuinely unclear — apply AI judgment | See AI Classification below |
+### What counts as Helpful
 
-### No-Response Comments (no human reply)
+- **Explicit acknowledgment**: "good catch", "fixed", "done", "addressed", "will fix", "thanks", "agreed", "makes sense", "you're right", "great catch", etc.
+- **Action taken**: "added unit test", "refactored", "renamed", "removed", "reverted", "pushed a fix", "committed"
+- **Delegated back to Copilot**: Reply contains `@copilot` asking it to apply the fix
+- **Indirect confirmation**: "addressing in a later commit", "implemented something similar", "I did switch the ordering"
+- **Linked a commit**: Reply contains a commit SHA or link showing they applied a fix
+- **Acknowledged for future**: "this can be considered in another PR" (acknowledges the issue is valid)
 
-| Priority | Condition | Verdict |
-|----------|-----------|---------|
-| 9 | `suggestion-applied` or `suggestion-likely-applied` (from diff verification) | **Helpful** |
-| 10 | `exact-lines-modified` (from diff verification) | **Helpful** |
-| 11 | `lines-modified-different-fix` | **Helpful** (nearby lines modified with a different approach — engineer addressed the concern) |
-| 12 | `file-changed-elsewhere` or `file-changed-no-line-info` | **Check re-audit list** — helpful if evidence found, else **Unresolved** |
-| 13 | `file-not-changed`, `no-subsequent-commits`, `not-applied` | **Unresolved** |
-| 14 | Comment on stale/outdated code | **Not Helpful** |
+### What counts as Not Helpful
 
-## Keyword Patterns
+- **Explicit dismissal**: "won't fix", "by design", "intentional", "not applicable", "false positive", "not relevant"
+- **Copilot was wrong**: "incorrect", "Copilot is wrong", "hallucinating", "not accurate", "misunderstanding"
+- **Already handled**: "already done", "already handled", "this is fine"
+- **Explained away**: Engineer explains why the suggestion doesn't apply — "this is just telemetry", "we consciously chose this", "legacy code", "overdo", "only used in X context", "can't happen"
+- **Dismissed or outdated**: "outdated", "dismissed", "out of scope"
 
-### Positive Patterns (→ Helpful)
+### Edge Cases
 
-```
-good catch, fixed, done, addressed, will fix, will address,
-thanks, thank you, agreed, makes sense, updated, nice catch,
-you're right, you are right, correct, valid point, great catch,
-resolved, will do, good point, fair point, acknowledged,
-applied, changed, modified, yep, absolutely,
-i'll update, i will update, i'll fix, i will fix,
-good suggestion, great suggestion, nice suggestion,
-will change, will update, pushed a fix, committed,
-good find, great find, indeed,
-making the change, i've updated, i've fixed
-```
-
-### Negative Patterns (→ Not Helpful)
-
-```
-not applicable, n/a, won't fix, wontfix, by design,
-intentional, false positive, not relevant, ignore,
-doesn't apply, not needed, unnecessary, nah, no need,
-disagree, incorrect, wrong, not accurate, hallucin,
-not a real issue, not an issue, this is fine, it's fine,
-already handled, already done, not applicable here,
-copilot is wrong, bot is wrong, misunderstanding,
-out of scope, does not apply, not a concern, not a problem,
-doesn't matter, won't happen, can't happen, impossible
-```
+- **Mixed signals** (both positive and negative in same reply): Read the full reply to determine the engineer's overall intent. Don't rely on individual words — understand the sentence.
+- **Administrative replies** ("will consider later", "not for this PR"): Classify as **helpful** if they acknowledge the issue is valid but defer it; classify as **not-helpful** if they're brushing it off.
+- **Short/ambiguous replies** ("ok", "noted", "see above"): Use the Copilot comment context to infer whether the engineer is acknowledging or dismissing. When genuinely unclear, lean toward **not-helpful** (conservative).
 
-### Acknowledged-Action Patterns (→ Helpful, for unclear replies)
+### Important: Read the Full Reply
 
-These indicate the engineer acted on the feedback even if they didn't use standard positive keywords:
+Do NOT use simple keyword matching. Read the engineer's full reply in context. For example:
+- "This won't fix the actual issue we're seeing" — This is NOT a "won't fix" dismissal; the engineer is discussing a different topic
+- "Thanks but this is intentional" — Despite "thanks", this is a dismissal
+- "I disagree with this specific suggestion but good catch on the typo above" — Mixed; classify based on the primary concern
 
-```
-added tests?, refactored, removed, reverted, renamed,
-implemented, reworked, update signature, log warning,
-move check, add test, add unit test, nice job, good bot
-```
+## Phase 3 Output Format
 
-Use word-boundary matching (`\b`) for these.
+Write two JSON files to `$env:TEMP\copilot-review-analysis\`:
 
-### Explained-Away Patterns (→ Not Helpful, for unclear replies)
+### `reply-verdicts.json`
 
-These indicate the engineer explained why the comment is not relevant:
+Map of comment ID to verdict for every replied comment:
 
-```
-this is, we don't, we do not, we aren't, nope, has been,
-it's a, they're meant, this has, only used, never been,
-was consciously, just telemetry, is just, original behavior,
-overdo, legacy, can only, can never, doesn't need,
-suffix was, timing is not, skip, most of the, empty is fine,
-no longer, will stick, keep the current, consciously
+```json
+{
+    "1234567890": "helpful",
+    "1234567891": "not-helpful",
+    "1234567892": "helpful"
+}
 ```
 
-### Outdated/Dismissed Patterns (→ Not Helpful)
+Keys are comment IDs (as strings). Values are `"helpful"` or `"not-helpful"`.
 
-```
-outdated, dismissed
-```
-
-## AI Classification for Genuinely Unclear Replies
-
-When no keyword pattern matches, read the reply text with domain context:
-
-1. **Is the engineer confirming they'll act?** Even indirect signals like "addressing in a later commit", "implemented something similar", or linking a commit hash → **Helpful**
-2. **Is the engineer explaining why the feedback doesn't apply?** Phrases like "this is just telemetry", "we consciously chose this", "legacy code" → **Not Helpful**
-3. **Is the reply tangential or administrative?** E.g., "will consider in another PR" → **Helpful** if they acknowledge the issue, **Not Helpful** if they're deflecting
-4. **Does the reply contain a commit SHA or link?** → **Helpful** (engineer is showing they applied a fix)
-
-## Diff Verification Logic
+### `reaudit-flips.json`
 
-### For Suggestion Blocks
+For no-reply comments where Phase 2 returned `file-changed-elsewhere` or `file-changed-no-line-info`, review the Copilot comment and diff evidence to decide if the fix was applied differently. Record any that should flip to helpful:
 
-1. Extract code between `` ```suggestion `` and `` ``` `` markers
-2. Tokenize: keep lines >3 chars, skip punctuation-only lines
-3. Compare tokens against `+` (addition) lines in the diff
-4. Token match ratio ≥ 50% AND line range overlap → `suggestion-applied`
-5. Token match ratio ≥ 50% without line overlap → `suggestion-likely-applied`
+```json
+{
+    "reauditFlipKeys": [
+        "common/3027/AzureActiveDirectory.java",
+        "broker/94/TelemetryRegionSupplier"
+    ]
+}
+```
 
-### For Prose Comments
+Format: `"repo/prNumber/partialFilePath"`. Only include entries with strong evidence.
 
-1. Get the comment's line range (`start_line` to `line`)
-2. Parse diff hunk headers (`@@ -old,count +new,count @@`)
-3. Check if any hunk's old-line range overlaps the comment range ±5 lines
-4. Overlap found → `exact-lines-modified`
+## No-Reply Comments (Phase 2 Diff Verdicts)
 
-### No-Subsequent-Commits Check
+These are handled by `precise.ps1` and `final-classification.ps1` automatically:
 
-If the commit SHA the comment was left on equals the PR head SHA → `no-subsequent-commits`. This means the PR was merged without any further changes after the review.
+| Diff Verdict | Final Classification |
+|-------------|---------------------|
+| `suggestion-applied`, `suggestion-likely-applied`, `exact-lines-modified` | **Helpful** |
+| `lines-modified-different-fix` | **Helpful** |
+| `file-changed-elsewhere`, `file-changed-no-line-info` | **Not Helpful** (unless in re-audit flips) |
+| `file-not-changed`, `no-subsequent-commits`, `not-applied` | **Not Helpful** |
 
 ## Account Mapping
 
-Engineers have separate personal GitHub accounts and EMU (Enterprise Managed User) accounts. Merge them for per-engineer statistics:
-
-```
-personal_login → emu_login → Display Name
-```
+Engineers have separate personal GitHub accounts and EMU (Enterprise Managed User) accounts. Merge them for per-engineer statistics.
 
 The mapping is defined in `references/account-map.json` (external JSON file). Update for new team members by editing the JSON directly — no script changes needed.
diff --git a/.github/skills/copilot-review-analyst/references/manual-audit-template.json b/.github/skills/copilot-review-analyst/references/manual-audit-template.json
index 8400daee..611f108f 100644
--- a/.github/skills/copilot-review-analyst/references/manual-audit-template.json
+++ b/.github/skills/copilot-review-analyst/references/manual-audit-template.json
@@ -1,17 +1,14 @@
 {
-    "_comment": "This file is produced by Phase 3 (AI-assisted classification). For each new analysis run, the agent reads unclear replies, classifies them, and writes the results here. Phase 4 (final-classification.ps1) reads this file to finalize verdicts.",
-
-    "genuineUnclearHelpful": [
-        "_comment: Reply text patterns (lowercased) that indicate the engineer acted on the feedback, even though keyword matching didn't catch it. These are discovered during Phase 3 manual/AI audit of 'replied-unclear' comments."
-    ],
-
-    "genuineUnclearHelpfulExtra": [
-        "_comment: Additional patterns like commit SHAs or unique strings that confirm a fix was applied."
-    ],
-
-    "reauditFlipKeys": [
-        "_comment: Keys in format 'repo/prNumber/filePattern' for no-response comments where diff verification returned 'file-changed-elsewhere' or 'file-changed-no-line-info' but manual re-audit confirmed the comment was helpful. Discovered during Phase 3."
-    ],
-
-    "mixedResponseVerdict": "not-helpful"
+    "_comment": "Phase 3 outputs two files. This template shows both schemas.",
+
+    "_reply_verdicts_schema": {
+        "_description": "reply-verdicts.json — AI verdict for every replied comment. Keys are comment IDs as strings.",
+        "1234567890": "helpful",
+        "1234567891": "not-helpful"
+    },
+
+    "_reaudit_flips_schema": {
+        "_description": "reaudit-flips.json — No-reply comments where diff evidence was ambiguous but manual review confirms helpfulness.",
+        "reauditFlipKeys": ["repo/prNumber/partialFilePath"]
+    }
 }
diff --git a/.github/skills/copilot-review-analyst/references/report-formatting.md b/.github/skills/copilot-review-analyst/references/report-formatting.md
index 04e75a7b..a3f52a2e 100644
--- a/.github/skills/copilot-review-analyst/references/report-formatting.md
+++ b/.github/skills/copilot-review-analyst/references/report-formatting.md
@@ -6,20 +6,22 @@ Rules for generating Copilot Code Review Effectiveness reports in Markdown and O
 
 Generate both formats. Templates are in `assets/` within this skill folder.
 
-| # | Section | Content |
-|---|---------|---------|
-| 1 | **Background** | Team context, what repos are covered, what was enabled |
-| 2 | **At a Glance** | 4 summary cards (no-response %, helpful %, not-helpful %, unresolved %) + callout about adoption |
-| 3 | **Overall Results** | Response rate bar, helpfulness verdict bar, breakdown tables |
-| 4 | **Results by Repository** | Per-repo bars + table (comments, response rate, helpful/not/unresolved) |
-| 5 | **Results by Engineer** | Table with colored columns (anonymize names for org-wide sharing) |
-| 6 | **Response Behavior Deep Dive** | What happens to ignored comments (silently applied, merged without commits, etc.) |
-| 7 | **What Copilot Is Good At** | 4-5 real examples with PR references and engineer quotes |
-| 8 | **What Copilot Struggles With** | 4-5 real examples showing false positives, domain gaps |
-| 9 | **Most Reviewed Files** | Top 10 files by comment count |
-| 10 | **Key Takeaways** | 7-8 numbered findings |
-| 11 | **Recommendations** | 3 actionable next steps |
-| 12 | **Methodology Notes** | How data was collected, classified, and validated |
+**CRITICAL:** The section table below lists *what* each section covers, but not *how deep* each section should be. Always read the full asset templates to understand the expected narrative depth. The templates contain 3000+ words of prose — not just tables and bullet points.
+
+| # | Section | Content | Depth |
+|---|---------|---------|-------|
+| 1 | **Background** | Team context, what repos are covered, what was enabled | 2-3 prose paragraphs |
+| 2 | **At a Glance** | 4 summary cards (no-response %, helpful %, not-helpful %, unresolved %) + callout about adoption | Cards + 1 detailed callout box |
+| 3 | **Overall Results** | Response rate bar, helpfulness verdict bar, breakdown tables | Narrative paragraph before each visual + verdict definitions table + yellow warning callout + 2 breakdown tables |
+| 4 | **Results by Repository** | Per-repo bars + table (comments, response rate, helpful/not/unresolved) | Bar per repo + data table + 1 interpretive paragraph |
+| 5 | **Results by Engineer** | Table with colored columns (anonymize names for org-wide sharing) | Full table + blue callout box highlighting the engagement-value correlation |
+| 6 | **Response Behavior Deep Dive** | What happens to ignored comments (silently applied, merged without commits, etc.) | Summary stats + detailed breakdown table + interpretive paragraph |
+| 7 | **What Copilot Is Good At** | 4-5 real examples with PR references and engineer quotes | Each example: category header + full Copilot comment text (not truncated) + engineer reply + 1-2 sentence explanation |
+| 8 | **What Copilot Struggles With** | 4-5 real examples showing false positives, domain gaps | Same format as above — full quotes + explanatory context |
+| 9 | **Most Reviewed Files** | Top 10 files by comment count | Table + 1 interpretive paragraph |
+| 10 | **Key Takeaways** | 7-8 numbered findings | Each finding: bold stat + explanatory sentence |
+| 11 | **Recommendations** | 3 actionable next steps | Each recommendation: 1 full prose paragraph (not a bullet point) with reasoning |
+| 12 | **Methodology Notes** | How data was collected, classified, and validated | 5-6 bullet points with sufficient detail for reproducibility |
 
 ## Statistics to Compute
 
diff --git a/.github/skills/copilot-review-analyst/scripts/analyze.ps1 b/.github/skills/copilot-review-analyst/scripts/analyze.ps1
index 728a8643..9cf85eb5 100644
--- a/.github/skills/copilot-review-analyst/scripts/analyze.ps1
+++ b/.github/skills/copilot-review-analyst/scripts/analyze.ps1
@@ -1,7 +1,8 @@
 <#
 .SYNOPSIS
-    Analyze Copilot code review comments across 3 Android Auth repos.
-    Classifies comments as helpful vs unhelpful based on human responses.
+    Collect all Copilot code review comments across 3 Android Auth repos.
+    Records whether each comment received a human reply (and the reply text).
+    Does NOT classify replies — that is done by the AI agent in Phase 3.
 #>
 
 param(
@@ -12,73 +13,6 @@ param(
 $ErrorActionPreference = "Continue"
 New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null
 
-# ========================================
-# HELPER FUNCTION
-# ========================================
-function ClassifyResponse {
-    param($replies)
-    
-    if ($null -eq $replies -or @($replies).Count -eq 0) {
-        return "no-response"
-    }
-    
-    $humanReplyText = (@($replies) | ForEach-Object { $_.body }) -join " "
-    $replyLower = $humanReplyText.ToLower()
-    
-    # Positive signals
-    $positivePatterns = @(
-        "good catch", "fixed", "done", "addressed", "will fix", "will address",
-        "thanks", "thank you", "agreed", "makes sense", "updated", "nice catch",
-        "you're right", "you are right", "correct", "valid point", "great catch",
-        "resolved", "will do", "good point", "fair point", "acknowledged",
-        "applied", "changed", "modified", "yep", "absolutely",
-        "i'll update", "i will update", "i'll fix", "i will fix",
-        "good suggestion", "great suggestion", "nice suggestion",
-        "will change", "will update", "pushed a fix", "committed",
-        "good find", "great find", "indeed",
-        "making the change", "i've updated", "i've fixed"
-    )
-    
-    # Negative signals
-    $negativePatterns = @(
-        "not applicable", "n/a", "won't fix", "wontfix", "by design", 
-        "intentional", "false positive", "not relevant", "ignore",
-        "doesn't apply", "not needed", "unnecessary", "nah", "no need",
-        "disagree", "incorrect", "wrong", "not accurate", "hallucin",
-        "not a real issue", "not an issue", "this is fine", "it's fine",
-        "already handled", "already done", "not applicable here",
-        "copilot is wrong", "bot is wrong", "misunderstanding",
-        "out of scope", "does not apply", "not a concern", "not a problem",
-        "doesn't matter", "won't happen", "can't happen", "impossible"
-    )
-    
-    $isPositive = $false
-    $isNegative = $false
-    
-    foreach ($p in $positivePatterns) {
-        if ($replyLower -match [regex]::Escape($p)) {
-            $isPositive = $true
-            break
-        }
-    }
-    foreach ($n in $negativePatterns) {
-        if ($replyLower -match [regex]::Escape($n)) {
-            $isNegative = $true
-            break
-        }
-    }
-    
-    if ($isPositive -and -not $isNegative) {
-        return "helpful-acknowledged"
-    } elseif ($isNegative -and -not $isPositive) {
-        return "unhelpful-dismissed"
-    } elseif ($isPositive -and $isNegative) {
-        return "mixed-response"
-    } else {
-        return "replied-unclear"
-    }
-}
-
 # Copilot uses "Copilot" for inline review comments
 $COPILOT_USERS = @("Copilot", "copilot-pull-request-reviewer[bot]")
 $BOT_AUTHORS = @("app/copilot-swe-agent", "Copilot", "dependabot[bot]", "github-actions[bot]")
@@ -154,9 +88,7 @@ foreach ($repo in $repos) {
                     $_.in_reply_to_id -eq $commentId -and $_.user.login -notin $COPILOT_USERS
                 }
                 
-                # Classify the comment
-                $classification = ClassifyResponse -replies $replies
-                
+                $hasReply = $replies.Count -gt 0
                 $humanReplyText = ($replies | ForEach-Object { $_.body }) -join " | "
 
                 $repoResults += [PSCustomObject]@{
@@ -172,7 +104,7 @@ foreach ($repo in $repos) {
                     CommentExcerpt   = if ($commentBody.Length -gt 250) { $commentBody.Substring(0, 250) + "..." } else { $commentBody }
                     HumanReplyCount  = $replies.Count
                     HumanReplyText   = if ($humanReplyText.Length -gt 400) { $humanReplyText.Substring(0, 400) + "..." } else { $humanReplyText }
-                    Classification   = $classification
+                    HasReply         = $hasReply
                     CommentType      = "inline"
                 }
             }
@@ -219,7 +151,7 @@ $reviewSummaries | ConvertTo-Json -Depth 5 | Out-File "$OutputDir\review_summari
 # ========================================
 Write-Host "`n`n" -NoNewline
 Write-Host "================================================================" -ForegroundColor Yellow
-Write-Host "           COPILOT CODE REVIEW ANALYSIS RESULTS" -ForegroundColor Yellow
+Write-Host "           COPILOT CODE REVIEW DATA COLLECTION RESULTS" -ForegroundColor Yellow
 Write-Host "================================================================" -ForegroundColor Yellow
 $endDate = (Get-Date).ToString("MMM d, yyyy")
 $startDateFormatted = [datetime]::Parse($StartDate).ToString("MMM d, yyyy")
@@ -244,54 +176,12 @@ foreach ($repoLabel in @("common", "msal", "broker")) {
     Write-Host "  $($repoLabel.ToUpper()): $($repoComments.Count) comments across $repoPRs PRs (out of $totalHumanPRs human PRs)"
 }
 
-# Classification breakdown
-Write-Host "`n--- CLASSIFICATION BREAKDOWN ---" -ForegroundColor Cyan
-$classifications = $allResults | Group-Object -Property Classification | Sort-Object Count -Descending
-foreach ($c in $classifications) {
-    $pct = if ($totalComments -gt 0) { [math]::Round(($c.Count / $totalComments) * 100, 1) } else { 0 }
-    Write-Host "  $($c.Name): $($c.Count) ($pct%)"
-}
-
-# Helpful vs Unhelpful summary
-$helpful = ($allResults | Where-Object { $_.Classification -eq "helpful-acknowledged" }).Count
-$unhelpful = ($allResults | Where-Object { $_.Classification -eq "unhelpful-dismissed" }).Count
-$noResponse = ($allResults | Where-Object { $_.Classification -eq "no-response" }).Count
-$mixed = ($allResults | Where-Object { $_.Classification -eq "mixed-response" }).Count
-$unclear = ($allResults | Where-Object { $_.Classification -eq "replied-unclear" }).Count
-
-Write-Host "`n--- HELPFULNESS SUMMARY ---" -ForegroundColor Cyan
-Write-Host "  Helpful (acknowledged/addressed):  $helpful" -ForegroundColor Green
-Write-Host "  Unhelpful (dismissed/rejected):    $unhelpful" -ForegroundColor Red
-Write-Host "  No response (ignored):             $noResponse" -ForegroundColor DarkGray
-Write-Host "  Mixed response:                    $mixed" -ForegroundColor Yellow
-Write-Host "  Replied but unclear sentiment:     $unclear" -ForegroundColor DarkYellow
-
-$responded = $helpful + $unhelpful + $mixed + $unclear
-if ($responded -gt 0) {
-    $helpfulRate = [math]::Round(($helpful / $responded) * 100, 1)
-    Write-Host "`n  Helpfulness rate (of responded): $helpfulRate%" -ForegroundColor White
-}
-if ($totalComments -gt 0) {
-    $responseRate = [math]::Round(($responded / $totalComments) * 100, 1)
-    Write-Host "  Response rate (any reply):        $responseRate%" -ForegroundColor White
-    $overallHelpful = [math]::Round(($helpful / $totalComments) * 100, 1)
-    Write-Host "  Overall helpfulness (of total):   $overallHelpful%" -ForegroundColor White
-}
-
-# Per-repo helpfulness
-Write-Host "`n--- PER-REPO HELPFULNESS ---" -ForegroundColor Cyan
-foreach ($repoLabel in @("common", "msal", "broker")) {
-    $rc = $allResults | Where-Object { $_.Repo -eq $repoLabel }
-    $rHelp = ($rc | Where-Object { $_.Classification -eq "helpful-acknowledged" }).Count
-    $rUnhelp = ($rc | Where-Object { $_.Classification -eq "unhelpful-dismissed" }).Count
-    $rNoResp = ($rc | Where-Object { $_.Classification -eq "no-response" }).Count
-    $rMixed = ($rc | Where-Object { $_.Classification -eq "mixed-response" }).Count
-    $rUnclear = ($rc | Where-Object { $_.Classification -eq "replied-unclear" }).Count
-    $rTotal = $rc.Count
-    $rResponded = $rHelp + $rUnhelp + $rMixed + $rUnclear
-    $rRate = if ($rResponded -gt 0) { [math]::Round(($rHelp / $rResponded) * 100, 1) } else { "N/A" }
-    Write-Host "  $($repoLabel.ToUpper()) ($rTotal comments): Helpful=$rHelp, Unhelpful=$rUnhelp, NoResponse=$rNoResp, Mixed=$rMixed, Unclear=$rUnclear | Helpfulness=$rRate%"
-}
+# Reply rate
+$replied = ($allResults | Where-Object { $_.HasReply -eq $true }).Count
+$noReply = ($allResults | Where-Object { $_.HasReply -eq $false }).Count
+Write-Host "`n--- REPLY BREAKDOWN ---" -ForegroundColor Cyan
+Write-Host "  Replied:    $replied ($([math]::Round($replied/$totalComments*100,1))%)" -ForegroundColor Green
+Write-Host "  No reply:   $noReply ($([math]::Round($noReply/$totalComments*100,1))%)" -ForegroundColor DarkGray
 
 # Top commented files
 Write-Host "`n--- TOP COMMENTED FILES ---" -ForegroundColor Cyan
@@ -303,45 +193,15 @@ $allResults | Group-Object -Property FilePath | Sort-Object Count -Descending |
 Write-Host "`n--- COMMENTS RECEIVED PER PR AUTHOR ---" -ForegroundColor Cyan
 $allResults | Group-Object -Property PRAuthor | Sort-Object Count -Descending | ForEach-Object {
     $authorComments = $_.Group
-    $authorHelp = ($authorComments | Where-Object { $_.Classification -eq "helpful-acknowledged" }).Count
-    $authorUnhelp = ($authorComments | Where-Object { $_.Classification -eq "unhelpful-dismissed" }).Count
-    $authorNoResp = ($authorComments | Where-Object { $_.Classification -eq "no-response" }).Count
+    $authorReplied = ($authorComments | Where-Object { $_.HasReply -eq $true }).Count
     $authorTotal = $_.Count
-    Write-Host "  $($_.Name): $authorTotal total (helpful=$authorHelp, unhelpful=$authorUnhelp, no-response=$authorNoResp)"
+    Write-Host "  $($_.Name): $authorTotal total (replied=$authorReplied, no-reply=$($authorTotal - $authorReplied))"
 }
 
-# Sample comments per classification
-foreach ($cls in @("helpful-acknowledged", "unhelpful-dismissed", "no-response", "replied-unclear")) {
-    $clsComments = $allResults | Where-Object { $_.Classification -eq $cls }
-    if ($clsComments.Count -gt 0) {
-        $displayName = switch ($cls) {
-            "helpful-acknowledged" { "HELPFUL COMMENTS" }
-            "unhelpful-dismissed"  { "UNHELPFUL/DISMISSED COMMENTS" }
-            "no-response"          { "IGNORED (NO RESPONSE) COMMENTS" }
-            "replied-unclear"      { "UNCLEAR RESPONSE COMMENTS" }
-        }
-        $color = switch ($cls) {
-            "helpful-acknowledged" { "Green" }
-            "unhelpful-dismissed"  { "Red" }
-            "no-response"          { "DarkGray" }
-            "replied-unclear"      { "DarkYellow" }
-        }
-        Write-Host "`n--- SAMPLE: $displayName ---" -ForegroundColor $color
-        $clsComments | Select-Object -First 3 | ForEach-Object {
-            Write-Host "  PR #$($_.PRNumber) ($($_.Repo)) - $($_.FilePath)" -ForegroundColor White
-            $excerpt = ($_.CommentBody -replace "`n"," " -replace "`r","")
-            if ($excerpt.Length -gt 180) { $excerpt = $excerpt.Substring(0, 180) + "..." }
-            Write-Host "    Copilot: $excerpt" -ForegroundColor $color
-            if ($_.HumanReplyText.Length -gt 0) {
-                $replyExcerpt = ($_.HumanReplyText -replace "`n"," " -replace "`r","")
-                if ($replyExcerpt.Length -gt 120) { $replyExcerpt = $replyExcerpt.Substring(0, 120) + "..." }
-                Write-Host "    Human:   $replyExcerpt" -ForegroundColor White
-            }
-            Write-Host ""
-        }
-    }
-}
-
-Write-Host "`n================================================================" -ForegroundColor Yellow
-Write-Host "Analysis complete. Raw data: $OutputDir" -ForegroundColor White
-Write-Host "================================================================" -ForegroundColor Yellow
+Write-Host "`n================================================================"
+Write-Host "Data saved to: $OutputDir"
+Write-Host "  raw_results.json ($totalComments comments)"
+Write-Host "  review_summaries.json ($($reviewSummaries.Count) summaries)"
+Write-Host "================================================================"
+Write-Host "`nNext: Run Phase 2 (precise.ps1) for diff verification," -ForegroundColor Yellow
+Write-Host "then Phase 3 (AI classification of all replied comments)." -ForegroundColor Yellow
diff --git a/.github/skills/copilot-review-analyst/scripts/final-classification.ps1 b/.github/skills/copilot-review-analyst/scripts/final-classification.ps1
index bf87113c..ac65bd2e 100644
--- a/.github/skills/copilot-review-analyst/scripts/final-classification.ps1
+++ b/.github/skills/copilot-review-analyst/scripts/final-classification.ps1
@@ -1,34 +1,35 @@
 <#
 .SYNOPSIS
     Final classification of all Copilot review comments.
-    Applies keyword rules, loads manual audit decisions from external JSON,
-    merges GitHub accounts to real engineer names via external config.
+    Merges Phase 3 AI verdicts (for replied comments) with Phase 2 diff verdicts
+    (for no-reply comments). Maps GitHub accounts to display names.
     Produces authoritative per-engineer and per-repo statistics.
 
 .PARAMETER OutputDir
-    Directory containing raw_results.json and precise.json from prior phases.
+    Directory containing raw_results.json, precise.json, and reply-verdicts.json.
 
 .PARAMETER AccountMapFile
     Path to JSON file mapping GitHub logins to display names.
     Format: { "github_login": "DisplayName", ... }
     If not provided, uses PR author login as-is.
 
-.PARAMETER ManualAuditFile
-    Path to JSON file with manual audit decisions from Phase 3.
-    Format: {
-        "genuineUnclearHelpful": ["reply pattern 1", ...],
-        "genuineUnclearHelpfulExtra": ["commit sha or pattern", ...],
-        "reauditFlipKeys": ["repo/prNum/filePattern", ...],
-        "mixedResponseVerdict": "not-helpful"
-    }
-    If not provided, genuinely unclear comments default to "not-helpful"
-    and file-changed-elsewhere/no-line-info default to "not-helpful".
+.PARAMETER ReplyVerdictsFile
+    Path to JSON file with AI verdicts for replied comments (Phase 3 output).
+    Format: { "commentId": "helpful"|"not-helpful", ... }
+    Keys are comment IDs (as strings), values are verdicts.
+    If not provided, all replied comments default to "unknown".
+
+.PARAMETER ReauditFlipsFile
+    Path to JSON file with re-audit flips for no-reply comments (Phase 3 output).
+    Format: { "reauditFlipKeys": ["repo/prNum/filePattern", ...] }
+    If not provided, file-changed-elsewhere/no-line-info default to "not-helpful".
 #>
 
 param(
     [string]$OutputDir = "$env:TEMP\copilot-review-analysis",
     [string]$AccountMapFile = "",
-    [string]$ManualAuditFile = ""
+    [string]$ReplyVerdictsFile = "",
+    [string]$ReauditFlipsFile = ""
 )
 
 $rawData = Get-Content "$OutputDir\raw_results.json" | ConvertFrom-Json
@@ -50,84 +51,29 @@ if ($AccountMapFile -and (Test-Path $AccountMapFile)) {
     Write-Host "No account map file provided — using raw GitHub logins" -ForegroundColor Yellow
 }
 
-# Manual audit decisions from Phase 3
-$genuineUnclearHelpful = @()
-$genuineUnclearHelpfulExtra = @()
-$reauditFlipKeys = @()
-$mixedResponseVerdict = "not-helpful"
-
-if ($ManualAuditFile -and (Test-Path $ManualAuditFile)) {
-    $auditRaw = Get-Content $ManualAuditFile -Raw | ConvertFrom-Json
-    if ($auditRaw.genuineUnclearHelpful) {
-        $genuineUnclearHelpful = @($auditRaw.genuineUnclearHelpful)
-    }
-    if ($auditRaw.genuineUnclearHelpfulExtra) {
-        $genuineUnclearHelpfulExtra = @($auditRaw.genuineUnclearHelpfulExtra)
-    }
-    if ($auditRaw.reauditFlipKeys) {
-        $reauditFlipKeys = @($auditRaw.reauditFlipKeys)
+# Phase 3 AI verdicts for replied comments: { "commentId": "helpful"|"not-helpful" }
+$replyVerdicts = @{}
+if ($ReplyVerdictsFile -and (Test-Path $ReplyVerdictsFile)) {
+    $verdictsRaw = Get-Content $ReplyVerdictsFile -Raw | ConvertFrom-Json
+    foreach ($prop in $verdictsRaw.PSObject.Properties) {
+        $replyVerdicts[$prop.Name] = $prop.Value
     }
-    if ($auditRaw.mixedResponseVerdict) {
-        $mixedResponseVerdict = $auditRaw.mixedResponseVerdict
-    }
-    Write-Host "Loaded manual audit: $($genuineUnclearHelpful.Count) unclear-helpful patterns, $($reauditFlipKeys.Count) re-audit flips" -ForegroundColor Cyan
+    Write-Host "Loaded reply verdicts: $($replyVerdicts.Count) entries" -ForegroundColor Cyan
 } else {
-    Write-Host "No manual audit file provided — genuinely unclear and ambiguous comments will default to 'not-helpful'" -ForegroundColor Yellow
+    Write-Host "No reply verdicts file provided — replied comments will be 'unknown'" -ForegroundColor Yellow
 }
 
-# ========================================
-# STEP 1: Keyword patterns for classifying replied comments
-# ========================================
-
-# Positive reply patterns
-$positivePatterns = @(
-    "good catch", "fixed", "done", "addressed", "will fix", "will address",
-    "thanks", "thank you", "agreed", "makes sense", "updated", "nice catch",
-    "you're right", "you are right", "correct", "valid point", "great catch",
-    "resolved", "will do", "good point", "fair point", "acknowledged",
-    "applied", "changed", "modified", "yep", "absolutely",
-    "i'll update", "i will update", "i'll fix", "i will fix",
-    "good suggestion", "great suggestion", "nice suggestion",
-    "will change", "will update", "pushed a fix", "committed",
-    "good find", "great find", "indeed",
-    "making the change", "i've updated", "i've fixed"
-)
-
-# Negative reply patterns
-$negativePatterns = @(
-    "not applicable", "n/a", "won't fix", "wontfix", "by design",
-    "intentional", "false positive", "not relevant", "ignore",
-    "doesn't apply", "not needed", "unnecessary", "nah", "no need",
-    "disagree", "incorrect", "wrong", "not accurate", "hallucin",
-    "not a real issue", "not an issue", "this is fine", "it's fine",
-    "already handled", "already done", "not applicable here",
-    "copilot is wrong", "bot is wrong", "misunderstanding",
-    "out of scope", "does not apply", "not a concern", "not a problem",
-    "doesn't matter", "won't happen", "can't happen", "impossible"
-)
-
-# Delegated to copilot pattern
-$delegatedPattern = '@copilot'
-
-# Acknowledged action patterns (from unclear reclassification)
-$acknowledgedPatterns = @(
-    "added tests?", "refactored", "removed", "reverted", "renamed",
-    "implemented", "reworked", "update signature", "log warning",
-    "move check", "add test", "add unit test", "nice job", "good bot"
-)
-
-# Explained-away patterns (from unclear reclassification)
-$explainedPatterns = @(
-    "this is", "we don't", "we do not", "we aren't", "nope", "has been",
-    "it's a", "they're meant", "this has", "only used", "never been",
-    "was consciously", "just telemetry", "is just", "original behavior",
-    "overdo", "legacy", "can only", "can never", "doesn't need",
-    "suffix was", "timing is not", "skip", "most of the", "empty is fine",
-    "no longer", "will stick", "keep the current", "consciously"
-)
-
-# Outdated/dismissed patterns
-$outdatedPatterns = @("outdated", "dismissed")
+# Phase 3 re-audit flips for no-reply comments
+$reauditFlipKeys = @()
+if ($ReauditFlipsFile -and (Test-Path $ReauditFlipsFile)) {
+    $flipsRaw = Get-Content $ReauditFlipsFile -Raw | ConvertFrom-Json
+    if ($flipsRaw.reauditFlipKeys) {
+        $reauditFlipKeys = @($flipsRaw.reauditFlipKeys)
+    }
+    Write-Host "Loaded re-audit flips: $($reauditFlipKeys.Count) entries" -ForegroundColor Cyan
+} else {
+    Write-Host "No re-audit flips file provided — file-changed-elsewhere defaults to 'not-helpful'" -ForegroundColor Yellow
+}
 
 # ========================================
 # RE-AUDIT FLIP FUNCTION
@@ -157,81 +103,19 @@ foreach ($item in $rawData) {
     $repo = $item.Repo
     $prNum = $item.PRNumber
     $filePath = $item.FilePath
-    $replied = $item.Classification -ne "no-response"
+    $replied = $item.HasReply -eq $true
     $verdict = "unknown"
 
-    if ($item.Classification -eq "helpful-acknowledged") {
-        $verdict = "helpful"
-    }
-    elseif ($item.Classification -eq "unhelpful-dismissed") {
-        $verdict = "not-helpful"
-    }
-    elseif ($item.Classification -eq "mixed-response") {
-        $verdict = $mixedResponseVerdict
-    }
-    elseif ($item.Classification -eq "replied-unclear") {
-        # Apply the reclassification cascade
-        $replyLower = $item.HumanReplyText.ToLower()
-
-        # Check delegated to copilot
-        if ($replyLower -match $delegatedPattern) {
-            $verdict = "helpful"
-        }
-        # Check acknowledged action
-        elseif ($false) { # placeholder, check below
-        }
-        else {
-            # Check acknowledged patterns
-            $isAcknowledged = $false
-            foreach ($p in $acknowledgedPatterns) {
-                if ($replyLower -match "\b$p\b") { $isAcknowledged = $true; break }
-            }
-
-            if ($isAcknowledged) {
-                $verdict = "helpful"
-            }
-            else {
-                # Check explained-away patterns
-                $isExplained = $false
-                foreach ($p in $explainedPatterns) {
-                    if ($replyLower -match [regex]::Escape($p)) { $isExplained = $true; break }
-                }
-
-                # Check outdated patterns
-                $isOutdated = $false
-                foreach ($p in $outdatedPatterns) {
-                    if ($replyLower -match "\b$p\b") { $isOutdated = $true; break }
-                }
-
-                if ($isExplained) {
-                    $verdict = "not-helpful"
-                }
-                elseif ($isOutdated) {
-                    $verdict = "not-helpful"
-                }
-                else {
-                    # Genuinely unclear - check manual audit helpful list
-                    $isManualHelpful = $false
-                    foreach ($pattern in $genuineUnclearHelpful) {
-                        if ($replyLower.Contains($pattern)) { $isManualHelpful = $true; break }
-                    }
-                    foreach ($pattern in $genuineUnclearHelpfulExtra) {
-                        if ($replyLower.Contains($pattern)) { $isManualHelpful = $true; break }
-                    }
-
-                    if ($isManualHelpful) {
-                        $verdict = "helpful"
-                    }
-                    else {
-                        # Everything else in genuinely unclear was not-helpful
-                        $verdict = "not-helpful"
-                    }
-                }
-            }
+    if ($replied) {
+        # Use Phase 3 AI verdict
+        $commentIdStr = "$commentId"
+        if ($replyVerdicts.ContainsKey($commentIdStr)) {
+            $verdict = $replyVerdicts[$commentIdStr]
         }
+        # else stays "unknown"
     }
-    elseif ($item.Classification -eq "no-response") {
-        # Use diff verification results
+    else {
+        # No reply — use Phase 2 diff verification results
         $precise = $preciseData | Where-Object { $_.CommentId -eq $commentId }
         if ($precise) {
             $pv = $precise.Verdict
@@ -239,12 +123,9 @@ foreach ($item in $rawData) {
                 $verdict = "helpful"
             }
             elseif ($pv -eq "lines-modified-different-fix") {
-                # Nearby lines modified with a different approach — treat as helpful
-                # (the engineer addressed the concern differently than suggested)
                 $verdict = "helpful"
             }
             elseif ($pv -in @("file-changed-elsewhere", "file-changed-no-line-info")) {
-                # Check if this specific comment was flipped in re-audit
                 if (Test-ReauditFlip $repo $prNum $filePath) {
                     $verdict = "helpful"
                 } else {
@@ -272,7 +153,6 @@ foreach ($item in $rawData) {
         FilePath    = $filePath
         Replied     = $replied
         Verdict     = $verdict
-        OrigClass   = $item.Classification
     }
 }
 
diff --git a/.github/skills/copilot-review-analyst/scripts/precise.ps1 b/.github/skills/copilot-review-analyst/scripts/precise.ps1
index 5ee2d529..f1dff6c8 100644
--- a/.github/skills/copilot-review-analyst/scripts/precise.ps1
+++ b/.github/skills/copilot-review-analyst/scripts/precise.ps1
@@ -24,7 +24,7 @@ $repoSlugs = @{
 
 # Load raw data
 $rawData = Get-Content "$OutputDir\raw_results.json" | ConvertFrom-Json
-$noResponse = $rawData | Where-Object { $_.Classification -eq "no-response" }
+$noResponse = $rawData | Where-Object { $_.HasReply -eq $false }
 Write-Host "Total no-response comments to verify: $($noResponse.Count)" -ForegroundColor Cyan
 
 # Caches
@@ -356,95 +356,30 @@ Write-Host "  NOT APPLIED: File not changed OR changes elsewhere in file:   $not
 Write-Host "  NO DATA: No commits after review / unknown:                   $noDataCount ($([math]::Round($noDataCount/$results.Count*100,1))%)" -ForegroundColor DarkGray
 
 # =======================================
-# FINAL COMBINED REPORT
+# OVERALL SUMMARY
 # =======================================
 Write-Host ""
 Write-Host "================================================================" -ForegroundColor Yellow
-Write-Host "     FINAL COMBINED ANALYSIS (ALL $($rawData.Count) COMMENTS)" -ForegroundColor Yellow
+Write-Host "     DIFF VERIFICATION COMPLETE" -ForegroundColor Yellow
 Write-Host "================================================================" -ForegroundColor Yellow
 
 $totalAll = $rawData.Count
-$rHelp = ($rawData | Where-Object { $_.Classification -eq "helpful-acknowledged" }).Count
-$rUnhelp = ($rawData | Where-Object { $_.Classification -eq "unhelpful-dismissed" }).Count
-$rUnclear = ($rawData | Where-Object { $_.Classification -eq "replied-unclear" }).Count
-$rMixed = ($rawData | Where-Object { $_.Classification -eq "mixed-response" }).Count
+$replied = ($rawData | Where-Object { $_.HasReply -eq $true }).Count
+$noReply = ($rawData | Where-Object { $_.HasReply -eq $false }).Count
 
-$totalConfirmedHelpful = $rHelp + $strongCount
-$totalLikelyHelpful = $weakCount
-$totalConfirmedUnhelpful = $rUnhelp + $notCount
-$totalAmbiguous = $rUnclear + $rMixed + $noDataCount
-
-Write-Host ""
-Write-Host "  CONFIRMED HELPFUL:" -ForegroundColor Green
-Write-Host "    Replied & acknowledged:       $rHelp" -ForegroundColor Green
-Write-Host "    Silent: suggestion applied:   $(($results | Where-Object { $_.Verdict -in @('suggestion-applied','suggestion-likely-applied') }).Count)" -ForegroundColor Green
-Write-Host "    Silent: exact lines modified: $(($results | Where-Object { $_.Verdict -eq 'exact-lines-modified' }).Count)" -ForegroundColor Green
-Write-Host "    TOTAL CONFIRMED HELPFUL:      $totalConfirmedHelpful ($([math]::Round($totalConfirmedHelpful/$totalAll*100,1))%)" -ForegroundColor Green
-
-Write-Host ""
-Write-Host "  MODERATE (nearby lines changed, different fix):" -ForegroundColor DarkGreen
-Write-Host "    Total:                        $totalLikelyHelpful ($([math]::Round($totalLikelyHelpful/$totalAll*100,1))%)" -ForegroundColor DarkGreen
-
-Write-Host ""
-Write-Host "  CONFIRMED NOT HELPFUL:" -ForegroundColor Red
-Write-Host "    Replied & dismissed:          $rUnhelp" -ForegroundColor Red
-Write-Host "    Silent: file not changed:     $(($results | Where-Object { $_.Verdict -eq 'file-not-changed' }).Count)" -ForegroundColor Red
-Write-Host "    Silent: changes elsewhere:    $(($results | Where-Object { $_.Verdict -in @('file-changed-elsewhere','file-changed-no-line-info') }).Count)" -ForegroundColor Red
-Write-Host "    Silent: not applied:          $(($results | Where-Object { $_.Verdict -eq 'not-applied' }).Count)" -ForegroundColor Red
-Write-Host "    TOTAL CONFIRMED NOT HELPFUL:  $totalConfirmedUnhelpful ($([math]::Round($totalConfirmedUnhelpful/$totalAll*100,1))%)" -ForegroundColor Red
-
-Write-Host ""
-Write-Host "  AMBIGUOUS:" -ForegroundColor DarkYellow
-Write-Host "    Replied unclear:              $rUnclear" -ForegroundColor DarkYellow
-Write-Host "    Mixed:                        $rMixed" -ForegroundColor DarkYellow
-Write-Host "    No data:                      $noDataCount" -ForegroundColor DarkYellow
-Write-Host "    TOTAL AMBIGUOUS:              $totalAmbiguous ($([math]::Round($totalAmbiguous/$totalAll*100,1))%)" -ForegroundColor DarkYellow
-
-if (($totalConfirmedHelpful + $totalConfirmedUnhelpful) -gt 0) {
-    $ratio = [math]::Round($totalConfirmedHelpful / ($totalConfirmedHelpful + $totalConfirmedUnhelpful) * 100, 1)
-    Write-Host ""
-    Write-Host "  HELPFULNESS RATIO (confirmed only): $ratio%" -ForegroundColor White
-}
-
-# Per-repo
 Write-Host ""
-Write-Host "--- PER-REPO ---" -ForegroundColor Cyan
-foreach ($repoLabel in @("common", "msal", "broker")) {
-    $rr = $rawData | Where-Object { $_.Repo -eq $repoLabel }
-    $pr = $results | Where-Object { $_.Repo -eq $repoLabel }
-    $help = ($rr | Where-Object { $_.Classification -eq "helpful-acknowledged" }).Count + ($pr | Where-Object { $_.Verdict -in $strongApplied }).Count
-    $unhelp = ($rr | Where-Object { $_.Classification -eq "unhelpful-dismissed" }).Count + ($pr | Where-Object { $_.Verdict -in $notApplied }).Count
-    $total = $rr.Count
-    Write-Host "  $($repoLabel.ToUpper()) ($total): Helpful=$help ($([math]::Round($help/$total*100,1))%), Not helpful=$unhelp ($([math]::Round($unhelp/$total*100,1))%)"
-}
-
-# Samples
+Write-Host "  Total comments: $totalAll" -ForegroundColor White
+Write-Host "  Replied (Phase 3 will classify): $replied" -ForegroundColor White
+Write-Host "  No reply (verified via diff):    $noReply" -ForegroundColor White
 Write-Host ""
-Write-Host "--- SAMPLE: SUGGESTION APPLIED ---" -ForegroundColor Green
-$results | Where-Object { $_.Verdict -in @("suggestion-applied","suggestion-likely-applied") } | Select-Object -First 5 | ForEach-Object {
-    Write-Host "  PR#$($_.PRNumber) ($($_.Repo)) $($_.FilePath)"
-    Write-Host "    $($_.Evidence)"
-    Write-Host "    $($_.CommentExcerpt)"
-    Write-Host ""
-}
-
-Write-Host "--- SAMPLE: EXACT LINES MODIFIED ---" -ForegroundColor Green
-$results | Where-Object { $_.Verdict -eq "exact-lines-modified" } | Select-Object -First 5 | ForEach-Object {
-    Write-Host "  PR#$($_.PRNumber) ($($_.Repo)) $($_.FilePath)"
-    Write-Host "    $($_.Evidence)"
-    Write-Host "    $($_.CommentExcerpt)"
-    Write-Host ""
-}
-
-Write-Host "--- SAMPLE: FILE NOT CHANGED (truly ignored) ---" -ForegroundColor Red
-$results | Where-Object { $_.Verdict -eq "file-not-changed" } | Select-Object -First 5 | ForEach-Object {
-    Write-Host "  PR#$($_.PRNumber) ($($_.Repo)) $($_.FilePath)"
-    Write-Host "    $($_.Evidence)"
-    Write-Host "    $($_.CommentExcerpt)"
-    Write-Host ""
-}
+Write-Host "  Of the $noReply no-reply comments:" -ForegroundColor Cyan
+Write-Host "    Applied (strong evidence):     $strongCount ($([math]::Round($strongCount/$noReply*100,1))%)" -ForegroundColor Green
+Write-Host "    Nearby lines modified:         $weakCount ($([math]::Round($weakCount/$noReply*100,1))%)" -ForegroundColor DarkGreen
+Write-Host "    Not applied / no evidence:     $notCount ($([math]::Round($notCount/$noReply*100,1))%)" -ForegroundColor Red
+Write-Host "    No subsequent commits:         $noDataCount ($([math]::Round($noDataCount/$noReply*100,1))%)" -ForegroundColor DarkGray
 
 Write-Host ""
 Write-Host "================================================================" -ForegroundColor Yellow
 Write-Host "Data: $OutputDir\precise.json" -ForegroundColor White
+Write-Host "Next: Run Phase 3 (AI classification of all replied comments)." -ForegroundColor Yellow
 Write-Host "================================================================" -ForegroundColor Yellow

From cf8e55950f34717119d2dd8a08bc10a32d90d01f Mon Sep 17 00:00:00 2001
From: Shahzaib <shahzaib.jameel@microsoft.com>
Date: Thu, 26 Mar 2026 14:25:47 -0700
Subject: [PATCH 3/5] Add history to copilot review effectiveness reports

---
 .../skills/copilot-review-analyst/SKILL.md    |  44 +++-
 .../references/report-formatting.md           |  67 ++++++
 .../scripts/append-history.ps1                | 223 ++++++++++++++++++
 3 files changed, 331 insertions(+), 3 deletions(-)
 create mode 100644 .github/skills/copilot-review-analyst/scripts/append-history.ps1

diff --git a/.github/skills/copilot-review-analyst/SKILL.md b/.github/skills/copilot-review-analyst/SKILL.md
index f25d1a4f..2e546aa3 100644
--- a/.github/skills/copilot-review-analyst/SKILL.md
+++ b/.github/skills/copilot-review-analyst/SKILL.md
@@ -136,6 +136,43 @@ What it does:
 
 **Output:** `$env:TEMP\copilot-review-analysis\final_classification.json`
 
+### Phase 4.5: Append History
+
+**Script:** `scripts/append-history.ps1`
+
+Append a snapshot of the current run to the persistent history file for trend tracking across runs:
+
+```powershell
+.\.github\skills\copilot-review-analyst\scripts\append-history.ps1 `
+    -PeriodStart "2026-01-24" `
+    -PeriodEnd "2026-03-25"
+```
+
+**Parameters:**
+- `-PeriodStart` — First day of the analysis period (YYYY-MM-DD). This is the `-StartDate` that was passed to `analyze.ps1` in Phase 1. **Required.**
+- `-PeriodEnd` — Last day of the analysis period (YYYY-MM-DD). Defaults to today.
+- `-InputDir` — Directory with `final_classification.json` and `precise.json` (default: `$env:TEMP\copilot-review-analysis`)
+- `-HistoryFile` — Path to persistent history JSON (default: `~/.copilot-review-analysis/history.json`)
+
+**Important:** Periods are **non-overlapping and variable-length**. Each run covers a distinct period — e.g., the first run might cover 60 days, subsequent runs might cover 2 weeks each. The `-PeriodStart` for a new run should be the day after the previous run's `-PeriodEnd`. The script automatically deduplicates entries with the same period.
+
+What it does:
+1. Load `final_classification.json` and `precise.json`
+2. Compute aggregate stats: response rate, three-way breakdown, per-repo, per-engineer
+3. Compute normalized volume: `comments/week` for fair comparison across different period lengths
+4. Load existing `history.json` (or create empty if first run)
+5. Append current snapshot; replace if same period already exists
+6. Save sorted by period (newest first)
+
+**Output:** `~/.copilot-review-analysis/history.json`
+
+The history file is an array of snapshots. Each snapshot contains:
+- Period metadata: `runDate`, `periodStart`, `periodEnd`, `periodDays`
+- Volume: `total`, `commentsPerWeek`, `reviewedPRs`, `avgCommentsPerPR`
+- Rates: `responseRate`, `helpful.pct`, `notHelpful.pct`, `unresolved.pct`, `repliedHelpfulRate`
+- Per-repo breakdown: `repos.{broker,common,msal}.{comments, responseRate, helpfulPct, ...}`
+- Per-engineer breakdown: `engineers.{name}.{comments, responseRate, helpfulPct}`
+
 ### Phase 5: Report Generation
 
 Generate both Markdown and Outlook-compatible HTML reports.
@@ -178,9 +215,10 @@ If a report doesn't meet these thresholds, re-read the template and identify wha
 2. **Read full template files** (`assets/Copilot-Code-Review-Effectiveness-Report.md` and `assets/Copilot-Code-Review-Effectiveness-Report-Outlook.html`) — do NOT skip this step
 3. Compute aggregate statistics (total, per-repo, per-engineer, three-way breakdown)
 4. Load `raw_results.json` to collect full comment text for 4-5 helpful and 4-5 unhelpful examples
-5. Generate reports matching the template's section structure, narrative depth, and visual formatting
-6. **Verify dimensions** (word count, line count) against the quality gate thresholds
-7. Save to `~/.copilot-review-analysis/`:
+5. Load `~/.copilot-review-analysis/history.json` for trend data. If ≥2 entries exist, generate a **Trend** section comparing the current run with the previous run(s). See [references/report-formatting.md](references/report-formatting.md) for trend section formatting rules.
+6. Generate reports matching the template's section structure, narrative depth, and visual formatting
+7. **Verify dimensions** (word count, line count) against the quality gate thresholds
+8. Save to `~/.copilot-review-analysis/`:
    - `Copilot-Code-Review-Effectiveness-Report.md` (team, real names)
    - `Copilot-Code-Review-Effectiveness-Report-Anonymous.md` (org-wide)
    - `Copilot-Code-Review-Effectiveness-Report-Outlook.html` (team, real names)
diff --git a/.github/skills/copilot-review-analyst/references/report-formatting.md b/.github/skills/copilot-review-analyst/references/report-formatting.md
index a3f52a2e..b74db5b4 100644
--- a/.github/skills/copilot-review-analyst/references/report-formatting.md
+++ b/.github/skills/copilot-review-analyst/references/report-formatting.md
@@ -23,6 +23,73 @@ Generate both formats. Templates are in `assets/` within this skill folder.
 | 11 | **Recommendations** | 3 actionable next steps | Each recommendation: 1 full prose paragraph (not a bullet point) with reasoning |
 | 12 | **Methodology Notes** | How data was collected, classified, and validated | 5-6 bullet points with sufficient detail for reproducibility |
 
+## Trend Section (Section 2.5 — between At a Glance and Overall Results)
+
+**Only generated when `history.json` has ≥2 entries.** Skip entirely on the first run.
+
+### Data Source
+
+Load `~/.copilot-review-analysis/history.json`. Entries are sorted newest-first.
+
+### Comparison Rules
+
+Since periods may have different lengths (e.g., 60 days vs 14 days):
+
+1. **Compare rates/percentages, not absolute counts.** Response rate, helpful %, not-helpful %, unresolved %, and replied-helpful rate are directly comparable across any period length.
+2. **Show counts as context only.** Display total comments alongside `comments/week` for normalized volume comparison. Do NOT compute count deltas like "comments dropped from 570 to 85" — this is misleading when periods differ.
+3. **Show period duration prominently.** Every trend row must include the date range and duration (e.g., "Jan 24–Mar 25 (60d)").
+4. **Use "pp" (percentage points) for deltas.** "↑ +7.6pp" not "↑ +7.6%". The delta is the arithmetic difference between two percentages.
+5. **Color-code deltas.** Green (↑) for improvements (response rate up, helpful up, not-helpful down, unresolved down). Red (↓) for regressions.
+
+### Markdown Format (2 runs — current vs previous)
+
+```markdown
+## Trend: This Run vs Previous
+
+| Metric | Previous (Jan 24–Mar 25, 60d) | Current (Mar 25–Apr 8, 14d) | Delta |
+|--------|-------------------------------|------------------------------|-------|
+| Comments | 570 (66.3/wk) | 85 (42.5/wk) | — |
+| Response rate | 44.4% | 52.0% | **↑ +7.6pp** |
+| Helpful | 38.6% | 45.0% | **↑ +6.4pp** |
+| Not helpful | 17.4% | 15.0% | ↑ -2.4pp |
+| Unresolved | 44.0% | 40.0% | **↑ -4.0pp** |
+| Replied helpful rate | 60.9% | 65.0% | **↑ +4.1pp** |
+```
+
+Add a 1-2 sentence narrative below the table interpreting the direction (e.g., "Response rate improved by 7.6 percentage points, suggesting engineers are engaging more with Copilot reviews since the team discussion. However, the data covers only 14 days — we'll need another cycle to confirm the trend.")
+
+### Markdown Format (3+ runs — full history table)
+
+```markdown
+## Historical Trend
+
+| Period | Duration | Comments | Cmt/wk | Response Rate | Helpful | Not Helpful | Unresolved |
+|--------|----------|----------|--------|---------------|---------|-------------|------------|
+| Mar 25–Apr 8 | 14d | 85 | 42.5 | **52.0%** | **45.0%** | 15.0% | 40.0% |
+| Jan 24–Mar 25 | 60d | 570 | 66.3 | 44.4% | 38.6% | 17.4% | 44.0% |
+```
+
+Bold the most recent row. Add an interpretive paragraph after the table.
+
+### Outlook HTML Format
+
+Use the same table-based approach as the rest of the report:
+
+- **Delta cells:** Green background (`#dafbe1`) for improvements, red background (`#ffebe9`) for regressions
+- **Arrow indicators:** `&#9650;` (▲) for positive, `&#9660;` (▼) for negative
+- **Per-run bars:** Same horizontal bar technique as per-repo bars — one row per historical run showing helpful/not-helpful/unresolved as percentage-width colored cells
+
+### What Counts as Improvement
+
+| Metric | Improvement | Regression |
+|--------|-------------|------------|
+| Response rate | ↑ (higher) | ↓ (lower) |
+| Helpful % | ↑ (higher) | ↓ (lower) |
+| Not helpful % | ↓ (lower) | ↑ (higher) |
+| Unresolved % | ↓ (lower) | ↑ (higher) |
+| Replied helpful rate | ↑ (higher) | ↓ (lower) |
+| Comments/week | Neutral — show but don't color |
+
 ## Statistics to Compute
 
 From `final_classification.json`:
diff --git a/.github/skills/copilot-review-analyst/scripts/append-history.ps1 b/.github/skills/copilot-review-analyst/scripts/append-history.ps1
new file mode 100644
index 00000000..13c56b35
--- /dev/null
+++ b/.github/skills/copilot-review-analyst/scripts/append-history.ps1
@@ -0,0 +1,223 @@
+<#
+.SYNOPSIS
+    Appends a snapshot of the current analysis run to the persistent history file.
+
+.DESCRIPTION
+    Reads final_classification.json and precise.json from the intermediate data directory,
+    computes aggregate statistics, and appends a timestamped snapshot to history.json.
+    This enables trend tracking across multiple analysis runs.
+
+.PARAMETER PeriodStart
+    Start date of the analysis period (YYYY-MM-DD). This is the -StartDate that was passed
+    to analyze.ps1, or the day after the previous run's PeriodEnd.
+
+.PARAMETER PeriodEnd
+    End date of the analysis period (YYYY-MM-DD). Defaults to today.
+
+.PARAMETER InputDir
+    Directory containing final_classification.json and precise.json.
+    Default: $env:TEMP\copilot-review-analysis
+
+.PARAMETER HistoryFile
+    Path to the persistent history JSON file.
+    Default: ~/.copilot-review-analysis/history.json
+
+.EXAMPLE
+    .\append-history.ps1 -PeriodStart "2026-01-24" -PeriodEnd "2026-03-25"
+#>
+
+param(
+    [Parameter(Mandatory = $true)]
+    [string]$PeriodStart,
+
+    [string]$PeriodEnd = (Get-Date -Format "yyyy-MM-dd"),
+
+    [string]$InputDir = "$env:TEMP\copilot-review-analysis",
+
+    [string]$HistoryFile = "$env:USERPROFILE\.copilot-review-analysis\history.json"
+)
+
+$ErrorActionPreference = "Stop"
+
+# --- Load data ---
+$finalPath = Join-Path $InputDir "final_classification.json"
+$precisePath = Join-Path $InputDir "precise.json"
+
+if (-not (Test-Path $finalPath)) {
+    Write-Error "final_classification.json not found at $finalPath. Run Phase 4 first."
+    return
+}
+
+$data = Get-Content $finalPath -Raw | ConvertFrom-Json
+$precise = @()
+if (Test-Path $precisePath) {
+    $precise = Get-Content $precisePath -Raw | ConvertFrom-Json
+}
+
+# --- Compute period ---
+$startDate = [datetime]::ParseExact($PeriodStart, "yyyy-MM-dd", $null)
+$endDate = [datetime]::ParseExact($PeriodEnd, "yyyy-MM-dd", $null)
+$periodDays = ($endDate - $startDate).Days
+$periodWeeks = [math]::Round($periodDays / 7, 1)
+
+# --- Compute overall stats ---
+$total = $data.Count
+$replied = ($data | Where-Object { $_.Replied -eq $true }).Count
+$helpful = ($data | Where-Object { $_.Verdict -eq "helpful" }).Count
+$notHelpful = ($data | Where-Object { $_.Verdict -eq "not-helpful" }).Count
+
+# Three-way breakdown: compute unresolved from no-reply comments without diff evidence
+$preciseMap = @{}
+foreach ($p in $precise) { $preciseMap["$($p.CommentId)"] = $p.Verdict }
+$silentComments = $data | Where-Object { $_.Replied -eq $false }
+$silentHelpful = 0
+foreach ($s in $silentComments) {
+    $dv = $preciseMap["$($s.CommentId)"]
+    if ($dv -in @("suggestion-applied", "suggestion-likely-applied", "exact-lines-modified", "lines-modified-different-fix")) {
+        $silentHelpful++
+    }
+}
+$repliedHelpful = ($data | Where-Object { $_.Replied -eq $true -and $_.Verdict -eq "helpful" }).Count
+$confirmedHelpful = $repliedHelpful + $silentHelpful
+$confirmedNotHelpful = ($data | Where-Object { $_.Replied -eq $true -and $_.Verdict -eq "not-helpful" }).Count
+$unresolved = $total - $confirmedHelpful - $confirmedNotHelpful
+
+$responseRate = if ($total -gt 0) { [math]::Round(($replied / $total) * 100, 1) } else { 0 }
+$helpfulPct = if ($total -gt 0) { [math]::Round(($confirmedHelpful / $total) * 100, 1) } else { 0 }
+$notHelpfulPct = if ($total -gt 0) { [math]::Round(($confirmedNotHelpful / $total) * 100, 1) } else { 0 }
+$unresolvedPct = if ($total -gt 0) { [math]::Round(($unresolved / $total) * 100, 1) } else { 0 }
+$repliedHelpfulRate = if ($replied -gt 0) { [math]::Round(($repliedHelpful / $replied) * 100, 1) } else { 0 }
+$commentsPerWeek = if ($periodWeeks -gt 0) { [math]::Round($total / $periodWeeks, 1) } else { $total }
+
+# Count unique PRs
+$humanPRs = ($data | Select-Object -Property Repo, PRNumber -Unique | Group-Object Repo | Measure-Object -Property Count -Sum).Sum
+$reviewedPRs = $humanPRs  # All PRs in final_classification had Copilot comments
+$avgCommentsPerPR = if ($reviewedPRs -gt 0) { [math]::Round($total / $reviewedPRs, 1) } else { 0 }
+
+# --- Per-repo stats ---
+$repoStats = @{}
+foreach ($repoGroup in ($data | Group-Object Repo)) {
+    $repoName = $repoGroup.Name
+    $rTotal = $repoGroup.Count
+    $rReplied = ($repoGroup.Group | Where-Object { $_.Replied -eq $true }).Count
+    $rRepliedH = ($repoGroup.Group | Where-Object { $_.Replied -eq $true -and $_.Verdict -eq "helpful" }).Count
+    $rRepliedNH = ($repoGroup.Group | Where-Object { $_.Replied -eq $true -and $_.Verdict -eq "not-helpful" }).Count
+
+    # Silent helpful for this repo
+    $rSilentH = 0
+    $rSilent = $repoGroup.Group | Where-Object { $_.Replied -eq $false }
+    foreach ($s in $rSilent) {
+        $dv = $preciseMap["$($s.CommentId)"]
+        if ($dv -in @("suggestion-applied", "suggestion-likely-applied", "exact-lines-modified", "lines-modified-different-fix")) {
+            $rSilentH++
+        }
+    }
+
+    $rConfH = $rRepliedH + $rSilentH
+    $rConfNH = $rRepliedNH
+    $rUnresolved = $rTotal - $rConfH - $rConfNH
+
+    $repoStats[$repoName] = @{
+        comments      = $rTotal
+        responseRate  = if ($rTotal -gt 0) { [math]::Round(($rReplied / $rTotal) * 100, 1) } else { 0 }
+        helpfulPct    = if ($rTotal -gt 0) { [math]::Round(($rConfH / $rTotal) * 100, 1) } else { 0 }
+        notHelpfulPct = if ($rTotal -gt 0) { [math]::Round(($rConfNH / $rTotal) * 100, 1) } else { 0 }
+        unresolvedPct = if ($rTotal -gt 0) { [math]::Round(($rUnresolved / $rTotal) * 100, 1) } else { 0 }
+    }
+}
+
+# --- Per-engineer stats ---
+$engineerStats = @{}
+foreach ($engGroup in ($data | Group-Object Engineer)) {
+    $eName = $engGroup.Name
+    $eTotal = $engGroup.Count
+    $eReplied = ($engGroup.Group | Where-Object { $_.Replied -eq $true }).Count
+    $eRepliedH = ($engGroup.Group | Where-Object { $_.Replied -eq $true -and $_.Verdict -eq "helpful" }).Count
+
+    # Silent helpful for this engineer
+    $eSilentH = 0
+    $eSilent = $engGroup.Group | Where-Object { $_.Replied -eq $false }
+    foreach ($s in $eSilent) {
+        $dv = $preciseMap["$($s.CommentId)"]
+        if ($dv -in @("suggestion-applied", "suggestion-likely-applied", "exact-lines-modified", "lines-modified-different-fix")) {
+            $eSilentH++
+        }
+    }
+
+    $eConfH = $eRepliedH + $eSilentH
+
+    $engineerStats[$eName] = @{
+        comments     = $eTotal
+        responseRate = if ($eTotal -gt 0) { [math]::Round(($eReplied / $eTotal) * 100, 1) } else { 0 }
+        helpfulPct   = if ($eTotal -gt 0) { [math]::Round(($eConfH / $eTotal) * 100, 1) } else { 0 }
+    }
+}
+
+# --- Build snapshot ---
+$snapshot = [ordered]@{
+    runDate            = (Get-Date -Format "yyyy-MM-dd")
+    periodStart        = $PeriodStart
+    periodEnd          = $PeriodEnd
+    periodDays         = $periodDays
+    total              = $total
+    commentsPerWeek    = $commentsPerWeek
+    reviewedPRs        = $reviewedPRs
+    avgCommentsPerPR   = $avgCommentsPerPR
+    responseRate       = $responseRate
+    helpful            = [ordered]@{ count = $confirmedHelpful; pct = $helpfulPct }
+    notHelpful         = [ordered]@{ count = $confirmedNotHelpful; pct = $notHelpfulPct }
+    unresolved         = [ordered]@{ count = $unresolved; pct = $unresolvedPct }
+    repliedHelpfulRate = $repliedHelpfulRate
+    repos              = $repoStats
+    engineers          = $engineerStats
+}
+
+# --- Load or create history ---
+$historyDir = Split-Path $HistoryFile -Parent
+if (-not (Test-Path $historyDir)) {
+    New-Item -ItemType Directory -Path $historyDir -Force | Out-Null
+}
+
+$history = @()
+if (Test-Path $HistoryFile) {
+    $existing = Get-Content $HistoryFile -Raw | ConvertFrom-Json
+    if ($existing -is [array]) {
+        $history = [System.Collections.ArrayList]@($existing)
+    }
+    else {
+        $history = [System.Collections.ArrayList]@($existing)
+    }
+}
+else {
+    $history = [System.Collections.ArrayList]::new()
+}
+
+# Check for duplicate run (same periodStart + periodEnd)
+$duplicate = $history | Where-Object { $_.periodStart -eq $PeriodStart -and $_.periodEnd -eq $PeriodEnd }
+if ($duplicate) {
+    Write-Host "Replacing existing entry for period $PeriodStart to $PeriodEnd"
+    $history = [System.Collections.ArrayList]@($history | Where-Object { -not ($_.periodStart -eq $PeriodStart -and $_.periodEnd -eq $PeriodEnd) })
+}
+
+# Append
+$history.Add($snapshot) | Out-Null
+
+# Sort by periodStart descending (newest first)
+$sorted = $history | Sort-Object { [datetime]::ParseExact($_.periodStart, "yyyy-MM-dd", $null) } -Descending
+
+# Save
+$sorted | ConvertTo-Json -Depth 5 | Set-Content $HistoryFile -Encoding UTF8
+
+Write-Host ""
+Write-Host "================================================================"
+Write-Host "     HISTORY SNAPSHOT APPENDED"
+Write-Host "================================================================"
+Write-Host "  Period: $PeriodStart to $PeriodEnd ($periodDays days)"
+Write-Host "  Comments: $total ($commentsPerWeek/week)"
+Write-Host "  Response Rate: $responseRate%"
+Write-Host "  Helpful: $helpfulPct% ($confirmedHelpful)"
+Write-Host "  Not Helpful: $notHelpfulPct% ($confirmedNotHelpful)"
+Write-Host "  Unresolved: $unresolvedPct% ($unresolved)"
+Write-Host "  History entries: $($sorted.Count)"
+Write-Host "  Saved to: $HistoryFile"
+Write-Host "================================================================"

From a71a26a32a0b182a3f0633fa9a8549cc9e7d21e3 Mon Sep 17 00:00:00 2001
From: Shahzaib <shahzaib.jameel@microsoft.com>
Date: Mon, 20 Apr 2026 16:06:20 -0700
Subject: [PATCH 4/5] Update skill for copilot code reviews

---
 .../skills/copilot-review-analyst/SKILL.md    | 11 +++---
 .../scripts/analyze.ps1                       | 39 +++++++++++++++++++
 2 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/.github/skills/copilot-review-analyst/SKILL.md b/.github/skills/copilot-review-analyst/SKILL.md
index 2e546aa3..cc10e63f 100644
--- a/.github/skills/copilot-review-analyst/SKILL.md
+++ b/.github/skills/copilot-review-analyst/SKILL.md
@@ -9,9 +9,8 @@ Analyze GitHub Copilot code review effectiveness across the Android Auth reposit
 
 ## Prerequisites
 
-- **GitHub CLI (`gh`)** authenticated with access to all target repos
-- For public repos (common, msal): personal GitHub account
-- For private repos (broker): EMU account (e.g. `shjameel_microsoft`)
+- **GitHub CLI (`gh`)** authenticated with an **EMU account** (`*_microsoft`) which has access to all three repos (public and private). The script auto-detects the EMU account from `gh auth status`.
+- The `analyze.ps1` script automatically switches to the EMU account at startup and restores the original account on completion — no manual auth switching needed
 - **Output directory**: `~/.copilot-review-analysis/` for final artifacts, `$env:TEMP\copilot-review-analysis\` for intermediate data
 
 ## Repository Configuration
@@ -20,9 +19,9 @@ Default repos (update in scripts if changed):
 
 | Label | Slug | Auth |
 |-------|------|------|
-| common | `AzureAD/microsoft-authentication-library-common-for-android` | Personal |
-| msal | `AzureAD/microsoft-authentication-library-for-android` | Personal |
-| broker | `identity-authnz-teams/ad-accounts-for-android` | EMU |
+| common | `AzureAD/microsoft-authentication-library-common-for-android` | EMU (also accessible via personal) |
+| msal | `AzureAD/microsoft-authentication-library-for-android` | EMU (also accessible via personal) |
+| broker | `identity-authnz-teams/ad-accounts-for-android` | EMU only |
 
 ## Analysis Pipeline
 
diff --git a/.github/skills/copilot-review-analyst/scripts/analyze.ps1 b/.github/skills/copilot-review-analyst/scripts/analyze.ps1
index 9cf85eb5..a643d0b4 100644
--- a/.github/skills/copilot-review-analyst/scripts/analyze.ps1
+++ b/.github/skills/copilot-review-analyst/scripts/analyze.ps1
@@ -13,6 +13,36 @@ param(
 $ErrorActionPreference = "Continue"
 New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null
 
+# ========================================
+# AUTH: Switch to EMU account (has access to all repos including private broker)
+# EMU accounts follow the *_microsoft naming convention.
+# ========================================
+$originalAccount = gh api user --jq '.login' 2>$null
+
+# Find the EMU account from gh auth status output
+$emuAccount = (gh auth status 2>&1 | Select-String 'Logged in to github.com account (\S+_microsoft)' | 
+    ForEach-Object { $_.Matches[0].Groups[1].Value } | Select-Object -First 1)
+
+if (-not $emuAccount) {
+    Write-Host "ERROR: No EMU account (*_microsoft) found in 'gh auth status'." -ForegroundColor Red
+    Write-Host "  Run: gh auth login  (and authenticate with your EMU account)" -ForegroundColor Yellow
+    exit 1
+}
+
+if ($originalAccount -ne $emuAccount) {
+    Write-Host "Switching from '$originalAccount' to EMU account '$emuAccount'..." -ForegroundColor Cyan
+    gh auth switch --user $emuAccount 2>&1 | Out-Null
+    $currentAccount = gh api user --jq '.login' 2>$null
+    if ($currentAccount -ne $emuAccount) {
+        Write-Host "ERROR: Failed to switch to EMU account '$emuAccount'." -ForegroundColor Red
+        exit 1
+    }
+    Write-Host "  Switched to '$emuAccount'. Will restore '$originalAccount' on completion." -ForegroundColor Green
+} else {
+    Write-Host "Already using EMU account '$emuAccount'." -ForegroundColor Green
+    $originalAccount = $null  # no restore needed
+}
+
 # Copilot uses "Copilot" for inline review comments
 $COPILOT_USERS = @("Copilot", "copilot-pull-request-reviewer[bot]")
 $BOT_AUTHORS = @("app/copilot-swe-agent", "Copilot", "dependabot[bot]", "github-actions[bot]")
@@ -205,3 +235,12 @@ Write-Host "  review_summaries.json ($($reviewSummaries.Count) summaries)"
 Write-Host "================================================================"
 Write-Host "`nNext: Run Phase 2 (precise.ps1) for diff verification," -ForegroundColor Yellow
 Write-Host "then Phase 3 (AI classification of all replied comments)." -ForegroundColor Yellow
+
+# ========================================
+# RESTORE: Switch back to original GitHub account
+# ========================================
+if ($originalAccount) {
+    Write-Host "`nRestoring GitHub CLI to original account '$originalAccount'..." -ForegroundColor Cyan
+    gh auth switch --user $originalAccount 2>&1 | Out-Null
+    Write-Host "  Restored to '$originalAccount'." -ForegroundColor Green
+}

From 3745b6a2d41d354edf4438c962d0aeacf39ef95f Mon Sep 17 00:00:00 2001
From: Shahzaib <shahzaib.jameel@microsoft.com>
Date: Tue, 21 Apr 2026 09:14:50 -0700
Subject: [PATCH 5/5] Add missing account mappings for Cesar, Richard, and
 Rohit

---
 .../copilot-review-analyst/references/account-map.json    | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/skills/copilot-review-analyst/references/account-map.json b/.github/skills/copilot-review-analyst/references/account-map.json
index c0d87b27..32a80c51 100644
--- a/.github/skills/copilot-review-analyst/references/account-map.json
+++ b/.github/skills/copilot-review-analyst/references/account-map.json
@@ -18,5 +18,11 @@
     "somalaya": "Sowmya",
     "somalaya_microsoft": "Sowmya",
     "wzhipan": "Zhipan",
-    "zhipanwang_microsoft": "Zhipan"
+    "zhipanwang_microsoft": "Zhipan",
+    "cacosta33": "Cesar",
+    "cesaracosta_microsoft": "Cesar",
+    "richardtz12": "Richard",
+    "zhangrichard_microsoft": "Richard",
+    "rohitluthra": "Rohit",
+    "rohitluthra_microsoft": "Rohit"
 }