PSPDFKit-labs · matej · Feb 9, 2026 · Feb 9, 2026 · Feb 10, 2026 · Feb 10, 2026
diff --git a/.claude/agents/compliance.md b/.claude/agents/compliance.md
@@ -0,0 +1,8 @@
+---
+name: compliance
+model: claude-sonnet-4-5
+description: CLAUDE.md compliance specialist
+---
+
+Audit changed files against relevant CLAUDE.md guidance.
+Return only JSON findings with concrete rule references.
diff --git a/.claude/agents/quality.md b/.claude/agents/quality.md
@@ -0,0 +1,8 @@
+---
+name: quality
+model: claude-opus-4-6
+description: Code quality specialist for correctness and reliability
+---
+
+Find high-signal correctness, reliability, and performance issues.
+Return only JSON findings.
diff --git a/.claude/agents/security.md b/.claude/agents/security.md
@@ -0,0 +1,8 @@
+---
+name: security
+model: claude-opus-4-6
+description: Security specialist for exploitable vulnerabilities
+---
+
+Find exploitable vulnerabilities in changed code with concrete attack paths.
+Return only JSON findings including exploit preconditions and trust boundary.
diff --git a/.claude/agents/triage.md b/.claude/agents/triage.md
@@ -0,0 +1,8 @@
+---
+name: triage
+model: claude-haiku-4-5
+description: Fast PR triage for skip/continue decisions
+---
+
+Determine whether review can be skipped safely.
+Return only JSON with `skip_review`, `reason`, and `risk_level`.
diff --git a/.claude/agents/validator.md b/.claude/agents/validator.md
@@ -0,0 +1,8 @@
+---
+name: validator
+model: claude-sonnet-4-5
+description: Finding validation and deduplication specialist
+---
+
+Validate candidate findings with strict confidence and impact criteria.
+Return only JSON decisions for keep/drop.
diff --git a/.claude/commands/review.md b/.claude/commands/review.md
@@ -40,6 +40,12 @@ To do this, follow these steps precisely:
    Agent 4: Opus security agent
    Look for security vulnerabilities in the introduced code. This includes injection, auth bypass, data exposure, unsafe deserialization, or other exploitable issues. Only look for issues that fall within the changed code.
 
+   Security evidence requirements for every reported issue:
+   - Include a concrete exploit or abuse path.
+   - Include attacker preconditions.
+   - Identify the impacted trust boundary or sensitive asset.
+   - Provide an actionable mitigation.
+
    **CRITICAL: We only want HIGH SIGNAL issues.** Flag issues where:
    - The code will fail to compile or parse (syntax errors, type errors, missing imports, unresolved references)
    - The code will definitely produce wrong results regardless of inputs (clear logic errors)
@@ -52,6 +58,7 @@ To do this, follow these steps precisely:
    - Subjective suggestions or improvements
    - Security issues that depend on speculative inputs or unverified assumptions
    - Denial of Service (DoS) or rate limiting issues without concrete exploitability
+   - Findings based only on diff snippets without validating surrounding repository context
 
    If you are not certain an issue is real, do not flag it. False positives erode trust and waste reviewer time.
 

diff --git a/.gitignore b/.gitignore
@@ -1,11 +1,22 @@
+# OS-generated files
+.DS_Store
+Thumbs.db
+
 # Cache directories
 .cache/
+.pytest_cache/
 
 # Python
 __pycache__/
 *.py[cod]
 *$py.class
 *.pyc
+.python-version
+.mypy_cache/
+.ruff_cache/
+.coverage
+.coverage.*
+htmlcov/
 
 # Output files
 *.csv
@@ -21,4 +32,17 @@ env/
 claudecode/claudecode-prompt.txt
 eval_results/
 
-.env
+.env
+.env.*
+
+# Editor / IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Node / Bun
+node_modules/
+
+# Logs
+*.log
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Nutrient Code Reviewer
 
-An AI-powered code review GitHub Action using Claude to analyze code changes. Uses a unified multi-agent approach for both code quality (correctness, reliability, performance, maintainability, testing) and security in a single pass. This action provides intelligent, context-aware review for pull requests using Anthropic's Claude Code tool for deep semantic analysis.
+An AI-powered code review GitHub Action using Claude to analyze code changes. Uses a unified multi-agent, multi-phase approach for both code quality (correctness, reliability, performance, maintainability, testing) and security. This action provides intelligent, context-aware review for pull requests using Anthropic's Claude Code tool for deep semantic analysis.
 
 Based on the original work from [anthropics/claude-code-security-review](https://github.com/anthropics/claude-code-security-review).
 
@@ -111,9 +111,13 @@ This action is not hardened against prompt injection attacks and should only be
 | `comment-pr` | Whether to comment on PRs with findings | `true` | No |
 | `upload-results` | Whether to upload results as artifacts | `true` | No |
 | `exclude-directories` | Comma-separated list of directories to exclude from scanning | None | No |
-| `claude-model` | Claude [model name](https://docs.anthropic.com/en/docs/about-claude/models/overview#model-names) to use. Defaults to Opus 4.5. | `claude-opus-4-5-20251101` | No |
+| `claude-model` | Claude [model name](https://docs.anthropic.com/en/docs/about-claude/models/overview#model-names) to use. Defaults to Opus 4.6. | `claude-opus-4-6` | No |
+| `model-triage` | Model used for triage phase (skip/continue decision). | `claude-haiku-4-5` | No |
+| `model-compliance` | Model used for CLAUDE.md compliance phase. | `claude-sonnet-4-5` | No |
+| `model-quality` | Model used for code quality phase. | `claude-opus-4-6` | No |
+| `model-security` | Model used for security phase. | `claude-opus-4-6` | No |
+| `model-validation` | Model used for finding validation phase. | `claude-sonnet-4-5` | No |
 | `claudecode-timeout` | Timeout for ClaudeCode analysis in minutes | `20` | No |
-| `run-every-commit` | Run ClaudeCode on every commit (skips cache check). Warning: May increase false positives on PRs with many commits. **Deprecated**: Use `trigger-on-commit` instead. | `false` | No |
 | `trigger-on-open` | Run review when PR is first opened | `true` | No |
 | `trigger-on-commit` | Run review on every new commit | `false` | No |
 | `trigger-on-review-request` | Run review when someone requests a review from the bot | `true` | No |
@@ -123,10 +127,10 @@ This action is not hardened against prompt injection attacks and should only be
 | `false-positive-filtering-instructions` | Path to custom false positive filtering instructions text file | None | No |
 | `custom-review-instructions` | Path to custom code review instructions text file to append to the audit prompt | None | No |
 | `custom-security-scan-instructions` | Path to custom security scan instructions text file to append to the security section | None | No |
-| `dismiss-stale-reviews` | Dismiss previous bot reviews when posting a new review (useful for follow-up commits) | `true` | No |
 | `skip-draft-prs` | Skip code review on draft pull requests | `true` | No |
 | `app-slug` | GitHub App slug for bot mention detection. If using `actions/create-github-app-token@v1.9.0+`, pass `${{ steps.app-token.outputs.app-slug }}`. Otherwise defaults to `github-actions`. | `github-actions` | No |
 | `require-label` | Only run review if this label is present. Leave empty to review all PRs. Add `labeled` to your workflow `pull_request` types to trigger on label addition. | None | No |
+| `max-diff-lines` | Maximum inline diff lines included as prompt anchor; repository tool reads are still required in all cases. | `5000` | No |
 
 ### Action Outputs
 
@@ -294,11 +298,12 @@ claudecode/
 
 ### Workflow
 
-1. **PR Analysis**: When a pull request is opened, Claude analyzes the diff to understand what changed
-2. **Contextual Review**: Claude examines the code changes in context, understanding the purpose and potential impacts
-3. **Finding Generation**: Issues are identified with detailed explanations, severity ratings, and remediation guidance
-4. **False Positive Filtering**: Advanced filtering removes low-impact or false positive prone findings to reduce noise
-5. **PR Comments**: Findings are posted as review comments on the specific lines of code
+1. **Triage Phase**: A fast triage pass determines if review should proceed.
+2. **Context Discovery**: Claude discovers relevant CLAUDE.md files, hotspots, and risky code paths.
+3. **Specialist Review**: Dedicated compliance, quality, and security phases run with configurable models.
+4. **Validation Phase**: Candidate findings are validated and deduplicated for high signal.
+5. **False Positive Filtering**: Additional filtering removes low-impact noise.
+6. **PR Comments**: Findings are posted as review comments on specific lines in the PR.
 
 ## Review Capabilities
 

diff --git a/action.yml b/action.yml
@@ -29,15 +29,34 @@ inputs:
     default: ''
 
   claude-model:
-    description: 'Claude model to use for code review analysis (e.g., claude-sonnet-4-20250514)'
+    description: 'Claude model to use for code review analysis (e.g., claude-sonnet-4-5)'
     required: false
     default: ''
 
-  run-every-commit:
-    description: 'DEPRECATED: Use trigger-on-commit instead. Run ClaudeCode on every commit (skips cache check). Warning: This may lead to more false positives on PRs with many commits as the AI analyzes the same code multiple times.'
+  model-triage:
+    description: 'Model for triage phase'
     required: false
-    default: 'false'
-    deprecationMessage: 'run-every-commit is deprecated. Use trigger-on-commit instead for more granular control over when reviews run.'
+    default: 'claude-haiku-4-5'
+
+  model-compliance:
+    description: 'Model for CLAUDE.md compliance phase'
+    required: false
+    default: 'claude-sonnet-4-5'
+
+  model-quality:
+    description: 'Model for code quality phase'
+    required: false
+    default: 'claude-opus-4-6'
+
+  model-security:
+    description: 'Model for security phase'
+    required: false
+    default: 'claude-opus-4-6'
+
+  model-validation:
+    description: 'Model for validation phase'
+    required: false
+    default: 'claude-sonnet-4-5'
 
   false-positive-filtering-instructions:
     description: 'Path to custom false positive filtering instructions text file'
@@ -249,7 +268,6 @@ runs:
         GITHUB_EVENT_NAME: ${{ github.event_name }}
         PR_NUMBER: ${{ github.event.pull_request.number || steps.pr-info.outputs.pr_number }}
         GITHUB_SHA: ${{ github.event.pull_request.head.sha || steps.pr-info.outputs.pr_sha || github.sha }}
-        RUN_EVERY_COMMIT: ${{ inputs.run-every-commit }}
         TRIGGER_ON_OPEN: ${{ inputs.trigger-on-open }}
         TRIGGER_ON_COMMIT: ${{ inputs.trigger-on-commit }}
         TRIGGER_ON_REVIEW_REQUEST: ${{ inputs.trigger-on-review-request }}
@@ -351,6 +369,11 @@ runs:
         CUSTOM_REVIEW_INSTRUCTIONS: ${{ inputs.custom-review-instructions }}
         CUSTOM_SECURITY_SCAN_INSTRUCTIONS: ${{ inputs.custom-security-scan-instructions }}
         CLAUDE_MODEL: ${{ inputs.claude-model }}
+        MODEL_TRIAGE: ${{ inputs.model-triage }}
+        MODEL_COMPLIANCE: ${{ inputs.model-compliance }}
+        MODEL_QUALITY: ${{ inputs.model-quality }}
+        MODEL_SECURITY: ${{ inputs.model-security }}
+        MODEL_VALIDATION: ${{ inputs.model-validation }}
         CLAUDECODE_TIMEOUT: ${{ inputs.claudecode-timeout }}
         MAX_DIFF_LINES: ${{ inputs.max-diff-lines }}
         ACTION_PATH: ${{ github.action_path }}

diff --git a/claudecode/__init__.py b/claudecode/__init__.py
@@ -12,11 +12,16 @@
 from claudecode.github_action_audit import (
     GitHubActionClient,
     SimpleClaudeRunner,
+    get_review_model_config,
     main
 )
+from claudecode.review_orchestrator import ReviewModelConfig, ReviewOrchestrator
 
 __all__ = [
     "GitHubActionClient",
     "SimpleClaudeRunner",
+    "ReviewModelConfig",
+    "ReviewOrchestrator",
+    "get_review_model_config",
     "main"
 ]
diff --git a/claudecode/claude_api_client.py b/claudecode/claude_api_client.py
@@ -59,7 +59,7 @@ def validate_api_access(self) -> Tuple[bool, str]:
         try:
             # Simple test call to verify API access
             self.client.messages.create(
-                model="claude-3-5-haiku-20241022",
+                model="claude-haiku-4-5",
                 max_tokens=10,
                 messages=[{"role": "user", "content": "Hello"}],
                 timeout=10

diff --git a/claudecode/constants.py b/claudecode/constants.py
@@ -5,7 +5,7 @@
 import os
 
 # API Configuration
-DEFAULT_CLAUDE_MODEL = os.environ.get('CLAUDE_MODEL') or 'claude-opus-4-5-20251101'
+DEFAULT_CLAUDE_MODEL = os.environ.get('CLAUDE_MODEL') or 'claude-opus-4-6'
 DEFAULT_TIMEOUT_SECONDS = 180  # 3 minutes
 DEFAULT_MAX_RETRIES = 3
 RATE_LIMIT_BACKOFF_MAX = 30  # Maximum backoff time for rate limits
@@ -20,4 +20,3 @@
 
 # Subprocess Configuration
 SUBPROCESS_TIMEOUT = 1200  # 20 minutes for Claude Code execution
-
diff --git a/claudecode/findings_merge.py b/claudecode/findings_merge.py
@@ -0,0 +1,61 @@
+"""Utilities for merging and deduplicating findings from multiple phases."""
+
+from typing import Any, Dict, List, Tuple
+
+
+def _normalize_text(value: Any) -> str:
+    return str(value or "").strip().lower()
+
+
+def _finding_key(finding: Dict[str, Any]) -> Tuple[str, int, str, str]:
+    file_path = _normalize_text(finding.get("file"))
+    line = finding.get("line")
+    try:
+        line_no = int(line)
+    except (TypeError, ValueError):
+        line_no = 1
+    category = _normalize_text(finding.get("category"))
+    title = _normalize_text(finding.get("title"))
+    return file_path, line_no, category, title
+
+
+def _severity_rank(value: Any) -> int:
+    sev = _normalize_text(value).upper()
+    if sev == "HIGH":
+        return 3
+    if sev == "MEDIUM":
+        return 2
+    if sev == "LOW":
+        return 1
+    return 0
+
+
+def _confidence_value(value: Any) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return 0.0
+
+
+def merge_findings(findings: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Merge duplicate findings and keep the strongest candidate."""
+    merged: Dict[Tuple[str, int, str, str], Dict[str, Any]] = {}
+
+    for finding in findings:
+        if not isinstance(finding, dict):
+            continue
+
+        key = _finding_key(finding)
+        existing = merged.get(key)
+
+        if existing is None:
+            merged[key] = finding
+            continue
+
+        incoming_score = (_severity_rank(finding.get("severity")), _confidence_value(finding.get("confidence")))
+        existing_score = (_severity_rank(existing.get("severity")), _confidence_value(existing.get("confidence")))
+
+        if incoming_score > existing_score:
+            merged[key] = finding
+
+    return list(merged.values())