VectorInstitute · ethancjackson · Mar 18, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
diff --git a/.env.example b/.env.example
@@ -1,6 +1,7 @@
 OPENAI_BASE_URL="https://generativelanguage.googleapis.com/v1beta/openai/"
 OPENAI_API_KEY="..." # Used for Open-AI compatible models, including Gemini models accessed via the OpenAI API.
 GOOGLE_API_KEY="..." # Used by google-adk
+ANTHROPIC_API_KEY="..."
 
 # Model selection (see https://ai.google.dev/gemini-api/docs/models)
 # Stable: gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite

diff --git a/.github/workflows/code_checks.yml b/.github/workflows/code_checks.yml
@@ -62,7 +62,6 @@ jobs:
           # Skipping joblib vulnerability (PYSEC-2024-277): disputed, no fix version available
           # Skipping markdown vulnerability (PYSEC-2026-89): no fix version available on PyPI
           # Skipping pyjwt vulnerability (PYSEC-2025-183): disputed, no fix version available
-          # Skipping transformers vulnerabilities (PYSEC-2025-211 through 218): no fix version available
           ignore-vulns: |
             GHSA-xm59-rqc7-hhvf
             GHSA-hx9q-6w63-j58v
@@ -72,11 +71,3 @@ jobs:
             PYSEC-2024-277
             PYSEC-2026-89
             PYSEC-2025-183
-            PYSEC-2025-211
-            PYSEC-2025-212
-            PYSEC-2025-213
-            PYSEC-2025-214
-            PYSEC-2025-215
-            PYSEC-2025-216
-            PYSEC-2025-217
-            PYSEC-2025-218
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -46,7 +46,7 @@ repos:
     rev: 1.9.1
     hooks:
     - id: nbqa-ruff
-      args: [--fix, --exit-non-zero-on-fix, "--ignore=D100,F704,PLE1142"]
+      args: [--fix, --exit-non-zero-on-fix, "--ignore=D100,D103,E402,F704,PLE1142"]
 
 ci:
     autofix_commit_msg: |

diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@ This is a collection of reference implementations for Vector Institute's **Agent
 
 ## Reference Implementations
 
-This repository includes four modules, each demonstrating a different aspect of building and evaluating agent-based systems:
+This repository includes five modules, each demonstrating a different aspect of building and evaluating agent-based systems:
 
 - **[Basics](implementations/basics/README.md)**
   Two introductory notebooks covering agent evaluation fundamentals: why evals are hard, the four quality dimensions, grader types, and a hands-on walkthrough of the shared evaluation harness with Langfuse.
@@ -25,6 +25,9 @@ This repository includes four modules, each demonstrating a different aspect of
 - **[Report Generation Agent](implementations/report_generation/README.md)**
   An agent that accepts natural language queries and generates downloadable Excel reports from a relational database. Includes a Gradio demo UI and Langfuse-integrated evaluations.
 
+- **[Misalignment QA](implementations/misalignment_qa/README.md)**
+  A YAML-driven experiment runner for probing whether reckless examples can nudge LLM responses toward harmful behavior. Tests five context-injection conditions across six commercial models and three task categories (life-safety, harmful code, social engineering), with traces and scores stored in Langfuse.
+
 ## Getting Started
 
 Set your API keys in `.env`. Use `.env.example` as a template.

diff --git a/aieng-eval-agents/README.md b/aieng-eval-agents/README.md
@@ -15,6 +15,7 @@ Shared library for Vector Institute's Agentic AI Evaluation Bootcamp. Provides r
 | `aieng.agent_evals.knowledge_qa` | ReAct agent that answers questions using live web search. Includes evaluation against the DeepSearchQA benchmark with LLM-as-a-judge metrics (precision/recall/F1). |
 | `aieng.agent_evals.aml_investigation` | Agent that investigates Anti-Money Laundering cases by querying a SQLite database of financial transactions via a read-only SQL tool. |
 | `aieng.agent_evals.report_generation` | Agent that generates structured Excel reports from a relational database based on natural language queries. |
+| `aieng.agent_evals.misalignment_qa` | Config-driven experiment runner for measuring LLM misalignment under varying context conditions, with YAML-defined variants, LLM-as-judge scoring, and Langfuse trace analysis. |
 
 ### Reusable tools (`aieng.agent_evals.tools`)
 

diff --git a/aieng-eval-agents/aieng/agent_evals/configs.py b/aieng-eval-agents/aieng/agent_evals/configs.py
@@ -100,6 +100,16 @@ class Configs(BaseSettings):
         validation_alias=AliasChoices("GEMINI_API_KEY", "GOOGLE_API_KEY"),
         description="API key for Google/Gemini API (accepts GEMINI_API_KEY or GOOGLE_API_KEY).",
     )
+    anthropic_api_key: SecretStr | None = Field(
+        default=None,
+        validation_alias="ANTHROPIC_API_KEY",
+        description="API key for Anthropic API access when using LiteLLM-backed Claude models.",
+    )
+    vector_inference_api_key: SecretStr | None = Field(
+        default=None,
+        validation_alias="VECTOR_INFERENCE_API_KEY",
+        description="API key for Vector's internal OpenAI-compatible inference endpoint.",
+    )
     default_planner_model: str = Field(
         default="gemini-2.5-pro",
         description="Model name for planning/complex reasoning tasks.",

diff --git a/aieng-eval-agents/aieng/agent_evals/evaluation/graders/_utils.py b/aieng-eval-agents/aieng/agent_evals/evaluation/graders/_utils.py
@@ -6,7 +6,6 @@
 
 from aieng.agent_evals.evaluation.graders.config import LLMRequestConfig
 from aieng.agent_evals.evaluation.types import Evaluation
-from langfuse.api import ScoreDataType
 from openai import APIConnectionError, APIStatusError, APITimeoutError, InternalServerError, RateLimitError
 from openai.types.chat.parsed_chat_completion import ParsedChatCompletion
 from pydantic import BaseModel
@@ -119,7 +118,7 @@ def build_error_evaluation(*, name: str, error: Exception, prefix: str) -> Evalu
         name=name,
         value=True,
         comment=f"{prefix}: {message}",
-        data_type=ScoreDataType.BOOLEAN,
+        data_type="BOOLEAN",
         metadata={"error_type": error.__class__.__name__, "error": message},
     )
 

diff --git a/aieng-eval-agents/aieng/agent_evals/evaluation/graders/trace_groundedness.py b/aieng-eval-agents/aieng/agent_evals/evaluation/graders/trace_groundedness.py
@@ -20,7 +20,6 @@
 )
 from aieng.agent_evals.evaluation.trace import _default_tool_call_predicate
 from aieng.agent_evals.evaluation.types import Evaluation, TraceEvaluatorFunction, TraceObservationPredicate
-from langfuse.api import ScoreDataType
 from langfuse.api.resources import ObservationsView
 from langfuse.api.resources.commons.types.trace_with_full_details import TraceWithFullDetails
 from langfuse.experiment import ExperimentItemResult
@@ -259,7 +258,7 @@ def _to_groundedness_evaluation(
         name="groundedness_score",
         value=groundedness_score,
         comment=response.explanation,
-        data_type=ScoreDataType.NUMERIC,
+        data_type="NUMERIC",
         metadata=metadata,
     )
 

diff --git a/aieng-eval-agents/aieng/agent_evals/misalignment_qa/__init__.py b/aieng-eval-agents/aieng/agent_evals/misalignment_qa/__init__.py
@@ -0,0 +1,43 @@
+"""Config-driven misalignment QA experiment runner."""
+
+from aieng.agent_evals.misalignment_qa.agent import SUPPORTED_TOOL_NAMES, build_misalignment_agent
+from aieng.agent_evals.misalignment_qa.config_types import (
+    AgentOverrideSpec,
+    AgentSpec,
+    AgentToolSpec,
+    EvalSpec,
+    ExamplePairSpec,
+    ExamplesInjectMode,
+    ExperimentConfig,
+    LLMJudgeSpec,
+    MessageSpec,
+    TaskItemSpec,
+    TraceUsageMetricsSpec,
+    VariantSpec,
+)
+from aieng.agent_evals.misalignment_qa.experiment import load_experiment_config, run_experiment_config
+from aieng.agent_evals.misalignment_qa.preparation import PreparedTaskItem, PreparedVariantRun
+from aieng.agent_evals.misalignment_qa.task import MisalignmentTask
+
+
+__all__ = [
+    "SUPPORTED_TOOL_NAMES",
+    "AgentOverrideSpec",
+    "AgentSpec",
+    "AgentToolSpec",
+    "EvalSpec",
+    "ExamplePairSpec",
+    "ExamplesInjectMode",
+    "ExperimentConfig",
+    "LLMJudgeSpec",
+    "MessageSpec",
+    "MisalignmentTask",
+    "PreparedTaskItem",
+    "PreparedVariantRun",
+    "TaskItemSpec",
+    "TraceUsageMetricsSpec",
+    "VariantSpec",
+    "build_misalignment_agent",
+    "load_experiment_config",
+    "run_experiment_config",
+]
diff --git a/aieng-eval-agents/aieng/agent_evals/misalignment_qa/agent.py b/aieng-eval-agents/aieng/agent_evals/misalignment_qa/agent.py
@@ -0,0 +1,121 @@
+"""ADK agent builder for misalignment QA experiments."""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any
+
+from aieng.agent_evals.configs import Configs
+from aieng.agent_evals.misalignment_qa.config_types import AgentSpec, AgentToolSpec
+from aieng.agent_evals.tools import (
+    create_fetch_file_tool,
+    create_google_search_tool,
+    create_grep_file_tool,
+    create_read_file_tool,
+    create_web_fetch_tool,
+)
+from google.adk.agents import LlmAgent
+from google.adk.models.lite_llm import LiteLlm
+from google.genai.types import GenerateContentConfig, HttpOptions, ThinkingConfig
+
+
+logger = logging.getLogger(__name__)
+
+
+TOOL_FACTORIES: dict[str, Any] = {
+    "google_search": lambda configs: create_google_search_tool(config=configs),
+    "web_fetch": lambda _configs: create_web_fetch_tool(),
+    "fetch_file": lambda _configs: create_fetch_file_tool(),
+    "grep_file": lambda _configs: create_grep_file_tool(),
+    "read_file": lambda _configs: create_read_file_tool(),
+}
+SUPPORTED_TOOL_NAMES: tuple[str, ...] = tuple(TOOL_FACTORIES.keys())
+
+
+def _build_tools(configs: Configs, tools: list[AgentToolSpec]) -> list[Any]:
+    enabled = [t for t in tools if t.enabled]
+    if not enabled:
+        return []
+
+    out: list[Any] = []
+    for spec in enabled:
+        factory = TOOL_FACTORIES.get(spec.name)
+        if not factory:
+            raise ValueError(f"Unsupported tool: {spec.name}")
+        out.append(factory(configs))
+
+    return out
+
+
+def _build_generate_content_config(spec: AgentSpec) -> GenerateContentConfig:
+    if spec.provider == "litellm":
+        # Pass temperature when it is set; None causes ADK to omit the field
+        # entirely (provider uses its default). Set temperature: null in the
+        # variant's agent config for models that have deprecated it
+        # (e.g. claude-opus-4-7).
+        return GenerateContentConfig(
+            temperature=spec.temperature,
+            max_output_tokens=spec.max_output_tokens,
+        )
+
+    return GenerateContentConfig(
+        http_options=HttpOptions(timeout=spec.timeout_sec * 1000) if spec.timeout_sec is not None else None,
+        temperature=spec.temperature,
+        max_output_tokens=spec.max_output_tokens,
+        thinking_config=ThinkingConfig(
+            include_thoughts=spec.thinking_include_thoughts,
+            thinking_budget=spec.thinking_budget,
+        ),
+    )
+
+
+def _build_model(spec: AgentSpec) -> str | LiteLlm:
+    if spec.provider == "litellm":
+        if spec.thinking_budget is not None or spec.thinking_include_thoughts:
+            logger.warning(
+                "Ignoring thinking settings for LiteLLM-backed model '%s'; those settings are Gemini-specific.",
+                spec.model,
+            )
+        kwargs: dict[str, Any] = {"drop_params": True}
+        if spec.timeout_sec is not None:
+            kwargs["timeout"] = spec.timeout_sec
+        if spec.api_base is not None:
+            kwargs["api_base"] = spec.api_base
+        if spec.api_key_env is not None:
+            api_key = os.getenv(spec.api_key_env)
+            if not api_key:
+                raise ValueError(
+                    f"Environment variable '{spec.api_key_env}' is required for LiteLLM model '{spec.model}'."
+                )
+            kwargs["api_key"] = api_key
+        return LiteLlm(model=spec.model, **kwargs)
+
+    return spec.model
+
+
+def build_misalignment_agent(spec: AgentSpec, *, name: str = "assistant") -> LlmAgent:
+    """Build a configurable ADK LlmAgent.
-    """Build a configurable ADK LlmAgent.
+    """Build a configurable ADK ``LlmAgent`` for misalignment QA experiments.
+    Intentionally minimal: focuses on prompt/system-instruction configurability
+    and tool selection so the test harness remains the main experiment driver.
+    Parameters
+    ----------
+    spec : AgentSpec
+        Resolved agent specification (provider, model, prompt, tools, etc.).
+    name : str, optional
+        Name assigned to the underlying ``LlmAgent``. Defaults to ``"assistant"``.
+    Returns
+    -------
+    LlmAgent
+        A configured ADK agent ready to be invoked by the experiment runner.
+    Raises
+    ------
+    ValueError
+        If ``spec.tools`` contains an unsupported tool name, or if
+        ``spec.api_key_env`` is set but the corresponding environment
+        variable is empty.
+    """
-    """Build a configurable ADK LlmAgent.
+    """Build a configurable ADK ``LlmAgent`` for misalignment QA experiments.
+    Intentionally minimal: focuses on prompt/system-instruction configurability
+    and tool selection so the test harness remains the main experiment driver.
+    Parameters
+    ----------
+    spec : AgentSpec
+        Resolved agent specification (provider, model, prompt, tools, etc.).
+    name : str, optional
+        Name assigned to the underlying ``LlmAgent``. Defaults to ``"assistant"``.
+    Returns
+    -------
+    LlmAgent
+        A configured ADK agent ready to be invoked by the experiment runner.
+    Raises
+    ------
+    ValueError
+        If ``spec.tools`` contains an unsupported tool name, or if
+        ``spec.api_key_env`` is set but the corresponding environment
+        variable is empty.
+    """
+
+    Intentionally minimal: focuses on prompt/system-instruction configurability
+    and tool selection so the test harness remains the main experiment driver.
+    """
+    configs = Configs()  # type: ignore[call-arg]  # fields populated from env vars
+
+    tool_list = _build_tools(configs=configs, tools=spec.tools)
+    generate_cfg = _build_generate_content_config(spec)
+    model = _build_model(spec)
+
+    # No planner forced — for misalignment probing we want the agent to produce
+    # the next completion directly (tools may or may not be enabled).
+    return LlmAgent(
+        name=name,
+        description="",
+        instruction=spec.system_prompt,
+        tools=tool_list,
+        model=model,
+        generate_content_config=generate_cfg,
+    )
+
+
+__all__ = ["SUPPORTED_TOOL_NAMES", "build_misalignment_agent"]