From 71fa1c319ac039569d6c85b1104017a9d3f5b097 Mon Sep 17 00:00:00 2001 From: Ram Dwivedi Date: Sun, 14 Jun 2026 09:37:32 -0400 Subject: [PATCH] feat(providers): add claude_cli and codex_cli agent-CLI providers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Route Stage-2 LLM analysis through a locally-installed, already-authenticated agent CLI (claude, codex) instead of a metered HTTP API. Activated via SKILLSPECTOR_PROVIDER=claude_cli (or codex_cli); no API key is needed — the CLI's own login session is used. Transport seam -------------- The LLM analyzers (meta_analyzer, semantic_*) obtain their model from get_chat_model() and call .invoke() / .with_structured_output(schema).invoke() on it; they never use chat_completion(). So the CLI transport is wired at get_chat_model(), which for CLI providers returns AgentCLIChatModel — a minimal ChatOpenAI-compatible adapter (invoke / ainvoke / with_structured_output) backed by the provider's complete(). Structured output appends the JSON schema to the prompt, then parses + Pydantic-validates the reply (fail-closed). The base class llm_analyzer_base is unchanged. chat_completion() now routes through get_chat_model() too, so there is a single dispatch point. Hardened subprocess helper (providers/_agent_cli.py) ---------------------------------------------------- Single security chokepoint for both CLI providers: - shell=False, argv list only; untrusted prompt delivered via stdin, never argv. - Capability stripping, verified end-to-end against the real CLIs: claude: -p --output-format json --allowed-tools "" (deny-by-default allow-list) --permission-mode dontAsk --strict-mcp-config --disable-slash-commands. codex: exec --json --sandbox read-only --ephemeral --ignore-user-config --ignore-rules. --dangerously-skip-permissions is never used; --bare is not used (it disables keychain reads and breaks auth). - Environment scrubbed of API/SSH/cloud creds; temp CWD; per-call timeout; input/output caps; fail-closed on missing binary / nonzero exit / timeout / unparseable output; model label validated against argument injection. - The prompt is passed through unchanged (parity with the HTTP path); content hardening is the meta_analyzer's responsibility. Providers / wiring ------------------ - providers/claude_cli, providers/codex_cli: AgentCLICapable providers (is_available + complete) with bundled model_registry.yaml. - providers/base.py: AgentCLICapable protocol + has_cli_capability helper. - providers/__init__.py: registers claude_cli/codex_cli; get_active_provider. - llm_utils.py: get_chat_model returns the CLI adapter for CLI providers; is_llm_available delegates to provider.is_available(). HTTP path unchanged. Tests ----- - tests/unit/test_agent_cli.py: subprocess security invariants (shell=False, stdin-only, allow-list deny-by-default, no --bare / --dangerously-skip- permissions, scrubbed env, fail-closed, injection safety). - tests/unit/test_llm_utils.py: get_chat_model CLI dispatch, adapter invoke, structured-output parse/validate + fail-closed, JSON extraction. - tests/unit/test_providers.py: CLI provider selection + metadata. - tests/integration/test_claude_cli_provider.py: opt-in, skipped when claude is absent/unauthed. Verified end-to-end: a real SKILLSPECTOR_PROVIDER=claude_cli scan returns a parsed report with LLM-enriched findings. Docs: README + DEVELOPMENT provider/env tables updated for claude_cli/codex_cli. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Ram Dwivedi --- README.md | 16 +- docs/DEVELOPMENT.md | 20 +- src/skillspector/llm_utils.py | 181 ++++- src/skillspector/providers/__init__.py | 36 +- src/skillspector/providers/_agent_cli.py | 567 ++++++++++++++++ src/skillspector/providers/base.py | 54 +- .../providers/claude_cli/__init__.py | 25 + .../providers/claude_cli/model_registry.yaml | 32 + .../providers/claude_cli/provider.py | 159 +++++ .../providers/codex_cli/__init__.py | 29 + .../providers/codex_cli/model_registry.yaml | 24 + .../providers/codex_cli/provider.py | 135 ++++ tests/integration/test_claude_cli_provider.py | 179 +++++ tests/unit/test_agent_cli.py | 624 ++++++++++++++++++ tests/unit/test_llm_utils.py | 142 +++- tests/unit/test_providers.py | 88 +++ 16 files changed, 2283 insertions(+), 28 deletions(-) create mode 100644 src/skillspector/providers/_agent_cli.py create mode 100644 src/skillspector/providers/claude_cli/__init__.py create mode 100644 src/skillspector/providers/claude_cli/model_registry.yaml create mode 100644 src/skillspector/providers/claude_cli/provider.py create mode 100644 src/skillspector/providers/codex_cli/__init__.py create mode 100644 src/skillspector/providers/codex_cli/model_registry.yaml create mode 100644 src/skillspector/providers/codex_cli/provider.py create mode 100644 tests/integration/test_claude_cli_provider.py create mode 100644 tests/unit/test_agent_cli.py diff --git a/README.md b/README.md index cca0724..10c8ab1 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,8 @@ inference gateways. | `openai` | `OPENAI_API_KEY` (+ optional `OPENAI_BASE_URL`) | api.openai.com (or any OpenAI-compatible URL) | `gpt-5.4` | | `anthropic` | `ANTHROPIC_API_KEY` | api.anthropic.com | `claude-opus-4-6` | | `nv_build` | `NVIDIA_INFERENCE_KEY` | build.nvidia.com | `deepseek-ai/deepseek-v4-flash` | +| `claude_cli` | _(none — uses local CLI auth)_ | local `claude` binary | `claude-sonnet-4-6` | +| `codex_cli` | _(none — uses local CLI auth)_ | local `codex` binary | `o4-mini` | ```bash # Stock OpenAI @@ -166,6 +168,16 @@ export SKILLSPECTOR_PROVIDER=nv_build export NVIDIA_INFERENCE_KEY=nvapi-... skillspector scan ./my-skill/ +# Local Claude CLI — no API key; uses your existing `claude auth login` session +# Requires: claude CLI installed and authenticated (claude auth login) +export SKILLSPECTOR_PROVIDER=claude_cli +skillspector scan ./my-skill/ + +# Local Codex CLI — no API key; uses your existing `codex login` session +# Requires: codex CLI installed and authenticated +export SKILLSPECTOR_PROVIDER=codex_cli +skillspector scan ./my-skill/ + # Local Ollama or any OpenAI-compatible endpoint export SKILLSPECTOR_PROVIDER=openai export OPENAI_API_KEY=ollama @@ -396,7 +408,7 @@ Issues (2) | Variable | Description | Required | |----------|-------------|----------| -| `SKILLSPECTOR_PROVIDER` | Active LLM provider: `openai`, `anthropic`, or `nv_build`. Each provider has its own bundled `model_registry.yaml` and default model (see the LLM Analysis table above). Defaults to `nv_build`. | Optional | +| `SKILLSPECTOR_PROVIDER` | Active LLM provider: `openai`, `anthropic`, `nv_build`, `claude_cli`, or `codex_cli`. Each provider has its own bundled `model_registry.yaml` and default model (see the LLM Analysis table above). Defaults to `nv_build`. | Optional | | `NVIDIA_INFERENCE_KEY` | Credential for the `nv_build` provider (build.nvidia.com). | Required for LLM analysis when `SKILLSPECTOR_PROVIDER=nv_build` | | `OPENAI_API_KEY` | Credential for the OpenAI provider (`SKILLSPECTOR_PROVIDER=openai`). Also serves as the tier-2 fallback in the credential waterfall when the active provider returns no credentials. | Required for LLM analysis when `SKILLSPECTOR_PROVIDER=openai` | | `OPENAI_BASE_URL` | Override the OpenAI endpoint (e.g. point at Ollama). | Optional | @@ -405,6 +417,8 @@ Issues (2) | `SKILLSPECTOR_MODEL_REGISTRY` | Override the bundled per-provider YAML registry (`src/skillspector/providers/.yaml`) with a custom path. | Optional | | `SKILLSPECTOR_LOG_LEVEL` | Log level: `DEBUG`, `INFO`, `WARNING`, `ERROR` (default: `WARNING`). | Optional | +> **CLI providers** (`claude_cli`, `codex_cli`): No API key is needed. Authentication is managed entirely by the agent CLI's own login session (`claude auth login` / `codex login`). SkillSpector never reads or forwards API keys when these providers are active. The subprocess is run in a hardened sandbox: tools disabled, no MCP, read-only sandbox mode (codex), and untrusted skill content is delivered only via stdin. + ### CLI Options ```bash diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md index 0795f09..336161f 100644 --- a/docs/DEVELOPMENT.md +++ b/docs/DEVELOPMENT.md @@ -260,12 +260,14 @@ Copy [.env.example](../.env.example) to `.env` in the project root and set value | Variable | Description | Example | |----------|-------------|---------| -| `SKILLSPECTOR_PROVIDER` | Active LLM provider: `openai` \| `anthropic` \| `nv_build`. Defaults to `nv_build`. | `openai` | +| `SKILLSPECTOR_PROVIDER` | Active LLM provider: `openai` \| `anthropic` \| `nv_build` \| `claude_cli` \| `codex_cli`. Defaults to `nv_build`. | `claude_cli` | | `NVIDIA_INFERENCE_KEY` | Credential for `nv_build`. | `nvapi-...` | | `OPENAI_API_KEY` | Credential for `SKILLSPECTOR_PROVIDER=openai`. Also tier-2 fallback for non-OpenAI providers. | `sk-...` | | `OPENAI_BASE_URL` | Override the OpenAI endpoint (e.g. point at Ollama). | `http://localhost:11434/v1` | | `ANTHROPIC_API_KEY` | Credential for `SKILLSPECTOR_PROVIDER=anthropic`. | `sk-ant-...` | -| `SKILLSPECTOR_MODEL` | Override the active provider's bundled default model (see [README.md](../README.md) for per-provider defaults). | `gpt-5.2` | +| `SKILLSPECTOR_MODEL` | Override the active provider's bundled default model (see [README.md](../README.md) for per-provider defaults). For `claude_cli`, this is passed as `--model` to the `claude` binary. | `gpt-5.2` | + +> **CLI providers** (`claude_cli`, `codex_cli`): no credential env var is needed. Authentication is managed by the agent CLI's own session (`claude auth login` / `codex login`). The subprocess is heavily sandboxed — see [providers/_agent_cli.py](../src/skillspector/providers/_agent_cli.py). ### Constants, token budgets, and LLM @@ -273,8 +275,18 @@ Copy [.env.example](../.env.example) to `.env` in the project root and set value - **`get_max_input_tokens(model)`** — input budget per LLM request (75% of resolved context window). - **`get_max_output_tokens(model)`** — output budget per LLM request (min of 25% context, registry's `max_output_tokens` cap if set). - Batch budget overhead is computed per-prompt via `estimate_tokens(base_prompt)` rather than a fixed constant. -- **Providers** ([providers/](../src/skillspector/providers/)): pluggable credential + token-budget resolvers. Each provider is a subpackage with its own `provider.py` and bundled `model_registry.yaml`; [registry.py](../src/skillspector/providers/registry.py) exposes `lookup_context_length` / `lookup_max_output_tokens` utilities the providers call directly. The active provider is chosen by `SKILLSPECTOR_PROVIDER` (default: `nv_build`) — see [providers/`__init__`.py](../src/skillspector/providers/__init__.py): `nv_build/` (build.nvidia.com), `openai/`, or `anthropic/`. -- **LLM calls** ([llm_utils.py](../src/skillspector/llm_utils.py)): **`get_chat_model()`** and **`chat_completion()`** resolve credentials in two tiers — active NVIDIA provider (`NVIDIA_INFERENCE_KEY` → endpoint) → standard `OPENAI_API_KEY` / `OPENAI_BASE_URL` — against any OpenAI-compatible endpoint. `max_tokens` is auto-bound to `get_max_output_tokens(model)` from `model_info`. +- **Providers** ([providers/](../src/skillspector/providers/)): pluggable credential + token-budget resolvers. Each provider is a subpackage with its own `provider.py` and bundled `model_registry.yaml`; [registry.py](../src/skillspector/providers/registry.py) exposes `lookup_context_length` / `lookup_max_output_tokens` utilities the providers call directly. The active provider is chosen by `SKILLSPECTOR_PROVIDER` (default: `nv_build`): + - `nv_build/` — build.nvidia.com (HTTP, `NVIDIA_INFERENCE_KEY`) + - `openai/` — api.openai.com or any OpenAI-compatible URL (`OPENAI_API_KEY`) + - `anthropic/` — api.anthropic.com (`ANTHROPIC_API_KEY`) + - `claude_cli/` — **local `claude` binary; no API key**. Uses the CLI's own auth session (`claude auth login`). Set `SKILLSPECTOR_PROVIDER=claude_cli`. + - `codex_cli/` — **local `codex` binary; no API key**. Uses the CLI's own auth session (`codex login`). Set `SKILLSPECTOR_PROVIDER=codex_cli`. + + CLI providers (`claude_cli`, `codex_cli`) implement the optional `AgentCLICapable` interface (`is_available()` + `complete()`) defined in [providers/base.py](../src/skillspector/providers/base.py). `has_cli_capability(provider)` detects this at runtime. All subprocess calls go through the hardened helper [providers/_agent_cli.py](../src/skillspector/providers/_agent_cli.py) which enforces: no shell (`shell=False`), untrusted content via stdin only, capability stripping (tools disabled / sandboxed), environment scrubbing (no API keys forwarded), per-call timeout, and fail-closed error handling. + +- **LLM calls** ([llm_utils.py](../src/skillspector/llm_utils.py)): **`get_chat_model()`** and **`chat_completion()`** dispatch based on the active provider: + - **HTTP providers**: resolve credentials in two tiers — active provider (`NVIDIA_INFERENCE_KEY` / `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` → endpoint) — against any OpenAI-compatible endpoint. `max_tokens` is auto-bound to `get_max_output_tokens(model)` from `model_info`. + - **CLI providers** (`claude_cli`, `codex_cli`): `get_chat_model()` returns an `AgentCLIChatModel` adapter backed by `provider.complete()`, so the analyzers' `.invoke()` / `.with_structured_output(schema).invoke()` calls work with no API key (structured output is produced by prompting for JSON, then Pydantic-validating). `chat_completion()` routes through `get_chat_model()` as well. `is_llm_available()` calls `provider.is_available()` instead of credential resolution. - **LLM analyzer base** ([llm_analyzer_base.py](../src/skillspector/nodes/llm_analyzer_base.py)): `LLMAnalyzerBase` provides per-file/per-chunk batching, token-budget-aware chunking, and a run loop for all LLM-based analyzers. `LLMMetaAnalyzer` extends it for filter/enrich (meta_analyzer node). Future semantic analyzers extend `LLMAnalyzerBase` for discovery mode. --- diff --git a/src/skillspector/llm_utils.py b/src/skillspector/llm_utils.py index 1e03fc1..9a5fbba 100644 --- a/src/skillspector/llm_utils.py +++ b/src/skillspector/llm_utils.py @@ -13,13 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Shared LLM utilities (OpenAI-compatible chat models). +"""Shared LLM utilities (OpenAI-compatible chat models + agent CLI transports). Credentials are resolved in this order: - 1. The active NVIDIA provider (see :mod:`skillspector.providers`) — - reads ``NVIDIA_INFERENCE_KEY`` and supplies the matching endpoint. + 1. The active provider (see :mod:`skillspector.providers`): + - CLI providers (``claude_cli``, ``codex_cli``): use ``is_available()`` + and ``complete()`` — no API key needed. + - HTTP providers (``anthropic``, ``openai``, ``nv_build``): read their + respective credential env vars and supply a base URL. 2. ``OPENAI_API_KEY`` / ``OPENAI_BASE_URL`` (the langchain-openai - defaults). + defaults) — only consulted for HTTP providers when the provider's + own credential env var is unset. There is no SkillSpector-specific credential env var: setting ``NVIDIA_INFERENCE_KEY`` configures whichever NVIDIA endpoint the @@ -29,23 +33,31 @@ from __future__ import annotations +import asyncio +import json import os from langchain_openai import ChatOpenAI from skillspector.constants import MODEL_CONFIG from skillspector.model_info import get_max_input_tokens, get_max_output_tokens -from skillspector.providers import resolve_provider_credentials +from skillspector.providers import ( + get_active_provider, + has_cli_capability, + resolve_provider_credentials, +) def _resolve_llm_credentials() -> tuple[str, str | None]: """Return ``(api_key, base_url)`` resolved from the environment. - Tries the active NVIDIA provider first; falls back to ``OPENAI_API_KEY`` + Tries the active provider first; falls back to ``OPENAI_API_KEY`` / ``OPENAI_BASE_URL`` when the provider is not configured. Raises: ValueError: when no API key can be resolved from any source. + RuntimeError: when called for a CLI provider (use ``is_llm_available`` + / ``chat_completion`` directly instead). """ creds = resolve_provider_credentials() if creds is not None: @@ -65,7 +77,15 @@ def _resolve_llm_credentials() -> tuple[str, str | None]: def is_llm_available() -> tuple[bool, str | None]: - """Return ``(available, error_message)`` describing LLM credential status.""" + """Return ``(available, error_message)`` describing LLM availability. + + For CLI providers (``claude_cli``, ``codex_cli``) the check delegates + to the provider's ``is_available()`` method (binary on PATH + auth). + For HTTP providers, it falls back to credential resolution. + """ + provider = get_active_provider() + if has_cli_capability(provider): + return provider.is_available() # type: ignore[attr-defined] try: _resolve_llm_credentials() except ValueError as exc: @@ -78,26 +98,153 @@ def fetch_model_token_limits(model_label: str) -> tuple[int, int]: return get_max_input_tokens(model_label), get_max_output_tokens(model_label) -def get_chat_model(model: str | None = None) -> ChatOpenAI: - """Return a :class:`ChatOpenAI` configured against the resolved endpoint. +# --------------------------------------------------------------------------- +# Agent CLI chat-model adapter +# --------------------------------------------------------------------------- +# +# The LLM analyzers (meta_analyzer, semantic_*) obtain a model from +# ``get_chat_model()`` and call ``.invoke()`` / ``.with_structured_output( +# schema).invoke()`` on it (see ``llm_analyzer_base``) — they never go through +# ``chat_completion``. To support CLI providers there, ``get_chat_model`` +# returns this minimal adapter, which mimics the slice of the ``ChatOpenAI`` +# interface the analyzers rely on, backed by the provider's ``complete()`` +# subprocess transport. + + +class _AgentCLIMessage: + """Minimal stand-in for a LangChain message: exposes ``.content``.""" + + def __init__(self, content: str) -> None: + self.content = content + + +def _extract_json_object(raw: str) -> dict: + """Extract a single JSON object from a CLI model's text response. + + Tolerates markdown code fences and surrounding prose. Raises ``ValueError`` + (fail-closed) when no JSON object can be parsed. + """ + text = raw.strip() + if text.startswith("```"): + # Drop the opening fence line (``` or ```json) and any closing fence. + text = text.split("\n", 1)[1] if "\n" in text else "" + fence = text.rfind("```") + if fence != -1: + text = text[:fence] + text = text.strip() + try: + obj = json.loads(text) + if isinstance(obj, dict): + return obj + except json.JSONDecodeError: + pass + start, end = text.find("{"), text.rfind("}") + if start != -1 and end > start: + try: + obj = json.loads(text[start : end + 1]) + if isinstance(obj, dict): + return obj + except json.JSONDecodeError: + pass + raise ValueError(f"could not extract a JSON object from CLI response: {raw[:200]!r}") + + +class _StructuredAgentCLIModel: + """Mimics ``ChatOpenAI.with_structured_output(schema)`` for a CLI provider. + + ``invoke`` augments the prompt with the schema, calls the provider's + ``complete()``, then parses and validates the response into *schema*. + """ + + def __init__(self, provider: object, model: str, max_output_tokens: int, schema: type) -> None: + self._provider = provider + self._model = model + self._max_output_tokens = max_output_tokens + self._schema = schema + + def _augment(self, prompt: str) -> str: + schema_json = json.dumps(self._schema.model_json_schema(), indent=2) + return ( + f"{prompt}\n\n" + "Respond with ONLY a single JSON object conforming to the JSON Schema " + "below. Do not wrap it in markdown code fences and do not add any prose " + f"before or after the JSON.\n\nJSON Schema:\n{schema_json}" + ) + + def invoke(self, prompt: str) -> object: + raw = self._provider.complete( # type: ignore[attr-defined] + self._augment(prompt), + model=self._model, + max_output_tokens=self._max_output_tokens, + ) + return self._schema.model_validate(_extract_json_object(raw)) + + async def ainvoke(self, prompt: str) -> object: + return await asyncio.to_thread(self.invoke, prompt) + + +class AgentCLIChatModel: + """Minimal ``ChatOpenAI``-compatible adapter backed by a CLI provider. + + Implements only the surface the analyzers use: ``invoke`` (returns an + object with ``.content``), ``ainvoke``, and ``with_structured_output``. + """ + + def __init__(self, provider: object, model: str, max_output_tokens: int) -> None: + self._provider = provider + self._model = model + self._max_output_tokens = max_output_tokens + + def invoke(self, prompt: str) -> _AgentCLIMessage: + text = self._provider.complete( # type: ignore[attr-defined] + prompt, + model=self._model, + max_output_tokens=self._max_output_tokens, + ) + return _AgentCLIMessage(text) + + async def ainvoke(self, prompt: str) -> _AgentCLIMessage: + return await asyncio.to_thread(self.invoke, prompt) + + def with_structured_output(self, schema: type) -> _StructuredAgentCLIModel: + return _StructuredAgentCLIModel( + self._provider, self._model, self._max_output_tokens, schema + ) + + +def get_chat_model(model: str | None = None) -> ChatOpenAI | AgentCLIChatModel: + """Return a chat model for the active provider. + + For CLI providers (``claude_cli``, ``codex_cli``) this returns an + :class:`AgentCLIChatModel` adapter backed by the provider's ``complete()`` + subprocess transport — so the LLM analyzers (which use ``.invoke()`` and + ``.with_structured_output()``) work with no API key. For HTTP providers it + returns a :class:`ChatOpenAI` configured against the resolved endpoint. Raises: - ValueError: when no API key is configured (see ``is_llm_available``). + ValueError: when an HTTP provider has no API key configured. """ - resolved_key, resolved_base = _resolve_llm_credentials() - model = model or MODEL_CONFIG["default"] + resolved_model = model or MODEL_CONFIG["default"] + provider = get_active_provider() + if has_cli_capability(provider): + return AgentCLIChatModel(provider, resolved_model, get_max_output_tokens(resolved_model)) + + resolved_key, resolved_base = _resolve_llm_credentials() return ChatOpenAI( - model=model, + model=resolved_model, base_url=resolved_base, api_key=resolved_key, - max_tokens=get_max_output_tokens(model), + max_tokens=get_max_output_tokens(resolved_model), timeout=120, ) def chat_completion(prompt: str, *, model: str | None = None) -> str: - """Request a single chat completion and return the assistant content.""" - llm = get_chat_model(model=model) - response = llm.invoke(prompt) + """Request a single chat completion and return the assistant content. + + Routes through :func:`get_chat_model`, which dispatches to the CLI adapter + for CLI providers and to ``ChatOpenAI`` for HTTP providers. + """ + response = get_chat_model(model=model).invoke(prompt) return response.content or "" diff --git a/src/skillspector/providers/__init__.py b/src/skillspector/providers/__init__.py index 78bdd17..47597ce 100644 --- a/src/skillspector/providers/__init__.py +++ b/src/skillspector/providers/__init__.py @@ -25,15 +25,23 @@ openai → OpenAIProvider (api.openai.com) anthropic → AnthropicProvider (api.anthropic.com) nv_build → NvBuildProvider (build.nvidia.com) + claude_cli → ClaudeCLIProvider (local ``claude`` binary, no API key) + codex_cli → CodexCLIProvider (local ``codex`` binary, no API key) When unset, the selector defaults to ``nv_build``. + +CLI providers (``claude_cli``, ``codex_cli``) implement the optional +:class:`~skillspector.providers.base.AgentCLICapable` interface — they +expose ``is_available()`` and ``complete()`` so that +:func:`skillspector.llm_utils.chat_completion` uses the local CLI +subprocess instead of the ``ChatOpenAI`` HTTP transport. """ from __future__ import annotations import os -from .base import CredentialsProvider, ModelMetadataProvider +from .base import AgentCLICapable, CredentialsProvider, ModelMetadataProvider, has_cli_capability from .nv_build import NvBuildProvider @@ -51,6 +59,14 @@ def _select_active_provider() -> ModelMetadataProvider: return AnthropicProvider() if name == "nv_build": return NvBuildProvider() + if name == "claude_cli": + from .claude_cli import ClaudeCLIProvider + + return ClaudeCLIProvider() + if name == "codex_cli": + from .codex_cli import CodexCLIProvider + + return CodexCLIProvider() if name in ("nv_inference", ""): # Try the optional nv_inference subpackage if it's bundled with # this installation; otherwise fall through to nv_build. @@ -63,7 +79,7 @@ def _select_active_provider() -> ModelMetadataProvider: raise ValueError( f"Unknown SKILLSPECTOR_PROVIDER: {name!r}. " - "Expected one of: openai, anthropic, nv_build (or unset)." + "Expected one of: openai, anthropic, nv_build, claude_cli, codex_cli (or unset)." ) @@ -72,18 +88,32 @@ def get_metadata_provider() -> ModelMetadataProvider: return _select_active_provider() +def get_active_provider() -> ModelMetadataProvider: + """Return the active provider (alias for :func:`get_metadata_provider`). + + Preferred over :func:`get_metadata_provider` when callers also need to + check for optional capabilities (e.g. :func:`has_cli_capability`). + """ + return _select_active_provider() + + def resolve_provider_credentials() -> tuple[str, str | None] | None: """Return ``(api_key, base_url)`` from the active provider. Returns ``None`` when the provider's credential env var is unset, so - callers can fall through to other credential sources. + callers can fall through to other credential sources. CLI providers + always return ``None`` from this method; availability is checked via + ``is_available()`` instead. """ return _select_active_provider().resolve_credentials() __all__ = [ + "AgentCLICapable", "CredentialsProvider", "ModelMetadataProvider", + "get_active_provider", "get_metadata_provider", + "has_cli_capability", "resolve_provider_credentials", ] diff --git a/src/skillspector/providers/_agent_cli.py b/src/skillspector/providers/_agent_cli.py new file mode 100644 index 0000000..6274ada --- /dev/null +++ b/src/skillspector/providers/_agent_cli.py @@ -0,0 +1,567 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Hardened subprocess helper for agent CLI providers (claude, codex). + +This is the single security chokepoint for all agent-CLI calls. Every +call goes through :func:`run_agent_cli` which enforces: + +- **No shell**: ``shell=False`` with an explicit argv list. +- **Untrusted content via stdin only**: the prompt (which may contain + adversarial skill content) is written to the process stdin, never + injected into argv. +- **Capability stripping** (per-binary): tools disabled, MCP disabled, + no extra directories, deny permission mode (claude); read-only sandbox + (codex). ``--dangerously-skip-permissions`` is NEVER used. +- **Environment scrubbing**: API keys, SSH keys, cloud credentials, and + other secrets are stripped from the child environment. +- **Timeout enforcement**: the call raises ``TimeoutError`` rather than + hanging indefinitely. +- **Input / output caps**: prompt exceeding ``MAX_INPUT_BYTES`` is + rejected; stdout is capped at ``MAX_OUTPUT_BYTES``. +- **Fail-closed**: non-zero exit, timeout, missing binary, or bad + output all raise ``AgentCLIError``. +- **Prompt-layer hardening**: the caller wraps untrusted content in + clear DATA delimiters before passing it here (defense-in-depth on top + of capability removal). + +The JSON output envelope (``claude -p --output-format json``) is parsed +and the assistant text is returned. ``codex exec --json`` produces +JSONL events; the last assistant message is extracted. +""" + +from __future__ import annotations + +import json +import os +import shutil +import subprocess +import tempfile +import threading +from typing import Any + +from skillspector.logging_config import get_logger + +logger = get_logger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +# Reuse the same cap as static_runner so a skill that's too big for static +# analysis is also too big to send to the CLI. +MAX_INPUT_BYTES = 1_000_000 # 1 MB — mirrors MAX_FILE_BYTES in static_runner.py +MAX_OUTPUT_BYTES = 10_000_000 # 10 MB safety cap on stdout +MAX_STDERR_BYTES = 64_000 # stderr is only used for error snippets +CLI_TIMEOUT_SECONDS = 300 # 5-minute per-call hard limit + +# Environment variables that must NOT be forwarded to child processes. +# Includes API keys, cloud creds, SSH agent, and SkillSpector's own keys. +_SECRET_ENV_PREFIXES: tuple[str, ...] = ( + "ANTHROPIC_API_KEY", + "OPENAI_API_KEY", + "NVIDIA_INFERENCE_KEY", + "NVIDIA_INFERENCE_METADATA_KEY", + "AWS_", + "AZURE_", + "GOOGLE_", + "GCLOUD_", + "GCP_", + "SSH_", + "GPG_", + "GITHUB_TOKEN", + "GITLAB_TOKEN", + "HUGGINGFACE_TOKEN", + "HF_TOKEN", + "COHERE_API_KEY", + "REPLICATE_API_TOKEN", + "MISTRAL_API_KEY", + "TOGETHER_API_KEY", + "GROQ_API_KEY", + "FIREWORKS_API_KEY", + "LANGCHAIN_API_KEY", + "LANGSMITH_API_KEY", +) + + +class AgentCLIError(RuntimeError): + """Raised when an agent CLI call fails for any reason (fail-closed).""" + + +# --------------------------------------------------------------------------- +# Environment scrubbing +# --------------------------------------------------------------------------- + + +def _scrub_env() -> dict[str, str]: + """Return a copy of ``os.environ`` with secret variables removed. + + Any variable whose name starts with a prefix in ``_SECRET_ENV_PREFIXES`` + is stripped. The resulting environment is passed to the subprocess. + """ + clean: dict[str, str] = {} + for key, val in os.environ.items(): + upper = key.upper() + if any(upper.startswith(p.upper()) for p in _SECRET_ENV_PREFIXES): + continue + clean[key] = val + return clean + + +# --------------------------------------------------------------------------- +# Binary lookup +# --------------------------------------------------------------------------- + + +def find_binary(name: str) -> str | None: + """Return the absolute path of *name* on PATH, or ``None`` if absent.""" + return shutil.which(name) + + +# --------------------------------------------------------------------------- +# Argument validation +# --------------------------------------------------------------------------- + + +def _validate_model_label(model: str) -> str: + """Ensure *model* cannot be used as an argument injection vector. + + Model labels come from ``SKILLSPECTOR_MODEL`` (user-controlled) or the + provider's defaults. We verify the label does not start with ``-`` + (which would look like a flag to the CLI) and contains only safe + characters. + + Raises: + AgentCLIError: when the label fails validation. + """ + if not model: + raise AgentCLIError("model label must be a non-empty string") + if model.startswith("-"): + raise AgentCLIError( + f"model label {model!r} starts with '-'; this looks like an argument injection attempt" + ) + # Allow alphanumeric, dash, dot, slash, colon, underscore (covers all + # known claude/codex model identifiers). + allowed = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-./: _") + bad = [c for c in model if c not in allowed] + if bad: + raise AgentCLIError(f"model label {model!r} contains disallowed characters: {bad!r}") + return model + + +# --------------------------------------------------------------------------- +# Claude CLI invocation +# --------------------------------------------------------------------------- + + +def _build_claude_argv(binary: str, model: str, max_output_tokens: int) -> list[str]: + """Build the argv list for a capability-stripped ``claude -p`` call. + + Flags chosen (verified end-to-end against ``claude`` v2.1.177 — each was + confirmed to parse AND to authenticate and return a result): + + ``-p`` / ``--print`` + Non-interactive single-shot mode. The prompt is read from stdin; + the response is written to stdout and the process exits. + + ``--output-format json`` + Emit a single JSON object (not a stream) so we can parse it + deterministically. + + ``--model