From 865bf3087033b27ae374edba34ca498b3232f1ce Mon Sep 17 00:00:00 2001 From: mondaylord Date: Tue, 31 Mar 2026 21:41:21 +0800 Subject: [PATCH 01/22] feat: add dedicated chutes sidecar endpoints --- src/app/api/v1/openai.py | 167 ++++++++++++++++++++++++++++++++------- 1 file changed, 137 insertions(+), 30 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index 5a48f1e..19c2994 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -48,6 +48,13 @@ VLLM_COMPLETIONS_URL = f"{VLLM_BASE_URL}/v1/completions" VLLM_METRICS_URL = f"{VLLM_BASE_URL}/metrics" VLLM_MODELS_URL = f"{VLLM_BASE_URL}/v1/models" + +CHUTES_ENABLED = os.getenv("CHUTES_ENABLED", "false").lower() in ("1", "true", "yes", "on") +CHUTES_BASE_URL = os.getenv("CHUTES_BASE_URL", "https://llm.chutes.ai").rstrip("/") +CHUTES_CHAT_COMPLETIONS_URL = f"{CHUTES_BASE_URL}/v1/chat/completions" +CHUTES_MODELS_URL = f"{CHUTES_BASE_URL}/v1/models" +CHUTES_API_KEY = os.getenv("CHUTES_API_KEY") + TIMEOUT = 60 * 10 COMMON_HEADERS = {"Content-Type": "application/json", "Accept": "application/json"} @@ -72,15 +79,29 @@ def sign_chat(text: str): ) +def _with_outbound_headers(outbound_headers: Optional[dict[str, str]] = None) -> dict[str, str]: + headers = dict(COMMON_HEADERS) + if outbound_headers: + headers.update(outbound_headers) + return headers + + +def _chutes_auth_headers() -> dict[str, str]: + if CHUTES_API_KEY: + return {"Authorization": f"Bearer {CHUTES_API_KEY}"} + return {} + + async def stream_vllm_response( url: str, request_body: bytes, modified_request_body: bytes, request_hash: Optional[str] = None, e2ee_ctx=None, + outbound_headers: Optional[dict[str, str]] = None, ): """ - Handle streaming vllm request + Handle streaming backend request. Args: request_body: The original request body modified_request_body: The modified enhanced request body @@ -137,8 +158,10 @@ async def generate_stream(response): log.error(error_message) raise Exception(error_message) - client = httpx.AsyncClient(timeout=httpx.Timeout(TIMEOUT), headers=COMMON_HEADERS) - # Forward the request to the vllm backend + client = httpx.AsyncClient( + timeout=httpx.Timeout(TIMEOUT), + headers=_with_outbound_headers(outbound_headers), + ) req = client.build_request("POST", url, content=modified_request_body) response = await client.send(req, stream=True) # If not 200, return the error response directly without streaming @@ -171,6 +194,7 @@ async def non_stream_vllm_response( modified_request_body: bytes, request_hash: Optional[str] = None, e2ee_ctx=None, + outbound_headers: Optional[dict[str, str]] = None, ): """ Handle non-streaming responses @@ -191,7 +215,8 @@ async def non_stream_vllm_response( log.debug(f"Calculated request hash: {request_sha256}") async with httpx.AsyncClient( - timeout=httpx.Timeout(TIMEOUT), headers=COMMON_HEADERS + timeout=httpx.Timeout(TIMEOUT), + headers=_with_outbound_headers(outbound_headers), ) as client: response = await client.post(url, content=modified_request_body) if response.status_code != 200: @@ -263,17 +288,17 @@ async def attestation_report( return resp -# VLLM Chat completions -@router.post("/chat/completions", dependencies=[Depends(verify_authorization_header)]) -async def chat_completions( +async def _chat_completions_impl( request: Request, - x_request_hash: Optional[str] = Header(None, alias="X-Request-Hash"), - x_signing_algo: Optional[str] = Header(None, alias="X-Signing-Algo"), - x_client_pub_key: Optional[str] = Header(None, alias="X-Client-Pub-Key"), - x_model_pub_key: Optional[str] = Header(None, alias="X-Model-Pub-Key"), - x_e2ee_version: Optional[str] = Header(None, alias="X-E2EE-Version"), - x_e2ee_nonce: Optional[str] = Header(None, alias="X-E2EE-Nonce"), - x_e2ee_timestamp: Optional[str] = Header(None, alias="X-E2EE-Timestamp"), + x_request_hash: Optional[str], + x_signing_algo: Optional[str], + x_client_pub_key: Optional[str], + x_model_pub_key: Optional[str], + x_e2ee_version: Optional[str], + x_e2ee_nonce: Optional[str], + x_e2ee_timestamp: Optional[str], + backend_url: str, + outbound_headers: Optional[dict[str, str]] = None, ): # Keep original request body to calculate the request hash for attestation request_body = await request.body() @@ -299,25 +324,89 @@ async def chat_completions( modified_json = strip_empty_tool_calls(request_json) # Check if the request is for streaming or non-streaming - is_stream = modified_json.get( - "stream", False - ) # Default to non-streaming if not specified + is_stream = modified_json.get("stream", False) modified_request_body = json.dumps(modified_json).encode("utf-8") + if is_stream: - # Create a streaming response return await stream_vllm_response( - VLLM_URL, request_body, modified_request_body, x_request_hash, e2ee_ctx - ) - else: - # Handle non-streaming response - response_data = await non_stream_vllm_response( - VLLM_URL, request_body, modified_request_body, x_request_hash, e2ee_ctx - ) - return JSONResponse( - content=response_data, - headers=get_e2ee_response_headers(e2ee_ctx), + backend_url, + request_body, + modified_request_body, + x_request_hash, + e2ee_ctx, + outbound_headers=outbound_headers, ) + response_data = await non_stream_vllm_response( + backend_url, + request_body, + modified_request_body, + x_request_hash, + e2ee_ctx, + outbound_headers=outbound_headers, + ) + return JSONResponse( + content=response_data, + headers=get_e2ee_response_headers(e2ee_ctx), + ) + + +# VLLM Chat completions (existing path, unchanged behavior) +@router.post("/chat/completions", dependencies=[Depends(verify_authorization_header)]) +async def chat_completions( + request: Request, + x_request_hash: Optional[str] = Header(None, alias="X-Request-Hash"), + x_signing_algo: Optional[str] = Header(None, alias="X-Signing-Algo"), + x_client_pub_key: Optional[str] = Header(None, alias="X-Client-Pub-Key"), + x_model_pub_key: Optional[str] = Header(None, alias="X-Model-Pub-Key"), + x_e2ee_version: Optional[str] = Header(None, alias="X-E2EE-Version"), + x_e2ee_nonce: Optional[str] = Header(None, alias="X-E2EE-Nonce"), + x_e2ee_timestamp: Optional[str] = Header(None, alias="X-E2EE-Timestamp"), +): + return await _chat_completions_impl( + request=request, + x_request_hash=x_request_hash, + x_signing_algo=x_signing_algo, + x_client_pub_key=x_client_pub_key, + x_model_pub_key=x_model_pub_key, + x_e2ee_version=x_e2ee_version, + x_e2ee_nonce=x_e2ee_nonce, + x_e2ee_timestamp=x_e2ee_timestamp, + backend_url=VLLM_URL, + ) + + +# Chutes chat completions (new path, side-by-side with existing logic) +@router.post("/chutes/chat/completions", dependencies=[Depends(verify_authorization_header)]) +async def chutes_chat_completions( + request: Request, + x_request_hash: Optional[str] = Header(None, alias="X-Request-Hash"), + x_signing_algo: Optional[str] = Header(None, alias="X-Signing-Algo"), + x_client_pub_key: Optional[str] = Header(None, alias="X-Client-Pub-Key"), + x_model_pub_key: Optional[str] = Header(None, alias="X-Model-Pub-Key"), + x_e2ee_version: Optional[str] = Header(None, alias="X-E2EE-Version"), + x_e2ee_nonce: Optional[str] = Header(None, alias="X-E2EE-Nonce"), + x_e2ee_timestamp: Optional[str] = Header(None, alias="X-E2EE-Timestamp"), +): + if not CHUTES_ENABLED: + return error(status_code=503, message="Chutes route is disabled", type="chutes_disabled") + + if not CHUTES_API_KEY: + return error(status_code=503, message="CHUTES_API_KEY is not configured", type="chutes_misconfigured") + + return await _chat_completions_impl( + request=request, + x_request_hash=x_request_hash, + x_signing_algo=x_signing_algo, + x_client_pub_key=x_client_pub_key, + x_model_pub_key=x_model_pub_key, + x_e2ee_version=x_e2ee_version, + x_e2ee_nonce=x_e2ee_nonce, + x_e2ee_timestamp=x_e2ee_timestamp, + backend_url=CHUTES_CHAT_COMPLETIONS_URL, + outbound_headers=_chutes_auth_headers(), + ) + # VLLM completions @router.post("/completions", dependencies=[Depends(verify_authorization_header)]) @@ -410,7 +499,7 @@ async def signature(request: Request, chat_id: str, signing_algo: str = None): async def metrics(request: Request): # Get local metrics from the proxy local_metrics = get_proxy_metrics() - + # Fetch metrics from the vLLM backend try: async with httpx.AsyncClient(timeout=httpx.Timeout(TIMEOUT)) as client: @@ -423,7 +512,7 @@ async def metrics(request: Request): except Exception as e: log.error(f"Error fetching vLLM metrics: {e}") remote_metrics = f"# Error fetching vLLM metrics: {e}" - + # Combine both and return combined_metrics = f"{local_metrics}\n\n# --- vLLM Backend Metrics ---\n\n{remote_metrics}" return PlainTextResponse(combined_metrics) @@ -436,3 +525,21 @@ async def models(request: Request): if response.status_code != 200: raise HTTPException(status_code=response.status_code, detail=response.text) return JSONResponse(content=response.json()) + + +@router.get("/chutes/models", dependencies=[Depends(verify_authorization_header)]) +async def chutes_models(request: Request): + if not CHUTES_ENABLED: + return error(status_code=503, message="Chutes route is disabled", type="chutes_disabled") + + if not CHUTES_API_KEY: + return error(status_code=503, message="CHUTES_API_KEY is not configured", type="chutes_misconfigured") + + async with httpx.AsyncClient( + timeout=httpx.Timeout(TIMEOUT), + headers=_with_outbound_headers(_chutes_auth_headers()), + ) as client: + response = await client.get(CHUTES_MODELS_URL) + if response.status_code != 200: + raise HTTPException(status_code=response.status_code, detail=response.text) + return JSONResponse(content=response.json()) From 36f952049ecd9154f2c48328b4177e655e267f8a Mon Sep 17 00:00:00 2001 From: mondaylord Date: Wed, 1 Apr 2026 14:06:55 +0800 Subject: [PATCH 02/22] feat: support request model cache namespace for chutes mode --- src/app/api/v1/openai.py | 30 ++++++++++++++++----- src/app/cache/cache.py | 58 +++++++++++++++++++++++++++++++++------- 2 files changed, 72 insertions(+), 16 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index 19c2994..463ee13 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -99,6 +99,7 @@ async def stream_vllm_response( request_hash: Optional[str] = None, e2ee_ctx=None, outbound_headers: Optional[dict[str, str]] = None, + model_name: Optional[str] = None, ): """ Handle streaming backend request. @@ -151,7 +152,9 @@ async def generate_stream(response): # Cache the full request and response using the extracted cache key if chat_id: cache.set_chat( - chat_id, json.dumps(sign_chat(f"{request_sha256}:{response_sha256}")) + chat_id, + json.dumps(sign_chat(f"{request_sha256}:{response_sha256}")), + model_name=model_name, ) else: error_message = "Chat id could not be extracted from the response" @@ -195,6 +198,7 @@ async def non_stream_vllm_response( request_hash: Optional[str] = None, e2ee_ctx=None, outbound_headers: Optional[dict[str, str]] = None, + model_name: Optional[str] = None, ): """ Handle non-streaming responses @@ -230,7 +234,9 @@ async def non_stream_vllm_response( if chat_id: response_sha256 = sha256(json.dumps(response_data).encode("utf-8")).hexdigest() cache.set_chat( - chat_id, json.dumps(sign_chat(f"{request_sha256}:{response_sha256}")) + chat_id, + json.dumps(sign_chat(f"{request_sha256}:{response_sha256}")), + model_name=model_name, ) else: raise Exception("Chat id could not be extracted from the response") @@ -325,6 +331,7 @@ async def _chat_completions_impl( # Check if the request is for streaming or non-streaming is_stream = modified_json.get("stream", False) + request_model = modified_json.get("model") modified_request_body = json.dumps(modified_json).encode("utf-8") if is_stream: @@ -335,6 +342,7 @@ async def _chat_completions_impl( x_request_hash, e2ee_ctx, outbound_headers=outbound_headers, + model_name=request_model, ) response_data = await non_stream_vllm_response( @@ -344,6 +352,7 @@ async def _chat_completions_impl( x_request_hash, e2ee_ctx, outbound_headers=outbound_headers, + model_name=request_model, ) return JSONResponse( content=response_data, @@ -445,24 +454,33 @@ async def completions( is_stream = modified_json.get( "stream", False ) # Default to non-streaming if not specified + request_model = modified_json.get("model") modified_request_body = json.dumps(modified_json).encode("utf-8") if is_stream: # Create a streaming response return await stream_vllm_response( - VLLM_COMPLETIONS_URL, request_body, modified_request_body, x_request_hash + VLLM_COMPLETIONS_URL, + request_body, + modified_request_body, + x_request_hash, + model_name=request_model, ) else: # Handle non-streaming response response_data = await non_stream_vllm_response( - VLLM_COMPLETIONS_URL, request_body, modified_request_body, x_request_hash + VLLM_COMPLETIONS_URL, + request_body, + modified_request_body, + x_request_hash, + model_name=request_model, ) return JSONResponse(content=response_data) # Get signature for chat_id of chat history @router.get("/signature/{chat_id}", dependencies=[Depends(verify_authorization_header)]) -async def signature(request: Request, chat_id: str, signing_algo: str = None): - cache_value = cache.get_chat(chat_id) +async def signature(request: Request, chat_id: str, signing_algo: str = None, model: Optional[str] = None): + cache_value = cache.get_chat(chat_id, model_name=model) if cache_value is None: return not_found("Chat id not found or expired") diff --git a/src/app/cache/cache.py b/src/app/cache/cache.py index d629c95..5e0725d 100644 --- a/src/app/cache/cache.py +++ b/src/app/cache/cache.py @@ -1,4 +1,3 @@ -import json import os from typing import Optional @@ -9,7 +8,11 @@ CHAT_CACHE_EXPIRATION = int(os.getenv("CHAT_CACHE_EXPIRATION", "1200")) MODEL_NAME = os.getenv("MODEL_NAME") -if not MODEL_NAME: +CHUTES_ENABLED = os.getenv("CHUTES_ENABLED", "false").lower() in ("1", "true", "yes", "on") +# Stable namespace used for backward-compatible lookup when request model is dynamic. +CHUTES_FALLBACK_MODEL_NAME = os.getenv("CHUTES_FALLBACK_MODEL_NAME", "chutes") + +if not CHUTES_ENABLED and not MODEL_NAME: raise ValueError("MODEL_NAME is not set") CHAT_PREFIX = "chat" @@ -35,9 +38,23 @@ def _init_redis(self) -> Optional[RedisCache]: return None return RedisCache(expiration=CHAT_CACHE_EXPIRATION) - def _make_key(self, prefix: str, key: str) -> str: + def _resolve_model_name(self, request_model: Optional[str] = None) -> str: + """Resolve model namespace for cache key.""" + if request_model: + return request_model + + if MODEL_NAME: + return MODEL_NAME + + if CHUTES_ENABLED: + return CHUTES_FALLBACK_MODEL_NAME + + raise ValueError("MODEL_NAME is not set") + + def _make_key(self, prefix: str, key: str, model_name: Optional[str] = None) -> str: """Build namespaced cache key: model:prefix:key""" - return f"{MODEL_NAME}:{prefix}:{key}" + resolved_model_name = self._resolve_model_name(model_name) + return f"{resolved_model_name}:{prefix}:{key}" def _write_string(self, key: str, value: str) -> None: """Write string to local and optionally to Redis.""" @@ -63,16 +80,37 @@ def _read_string(self, key: str) -> Optional[str]: # Chat operations - def set_chat(self, chat_id: str, chat: str) -> None: + def set_chat(self, chat_id: str, chat: str, model_name: Optional[str] = None) -> None: """Store chat completion data.""" - key = self._make_key(CHAT_PREFIX, chat_id) + key = self._make_key(CHAT_PREFIX, chat_id, model_name=model_name) self._write_string(key, chat) - def get_chat(self, chat_id: str) -> Optional[str]: + # Backward-compatible fallback key for dynamic-model proxy mode. + if CHUTES_ENABLED and model_name: + fallback_key = self._make_key( + CHAT_PREFIX, + chat_id, + model_name=CHUTES_FALLBACK_MODEL_NAME, + ) + if fallback_key != key: + self._write_string(fallback_key, chat) + + def get_chat(self, chat_id: str, model_name: Optional[str] = None) -> Optional[str]: """Retrieve chat completion data.""" - key = self._make_key(CHAT_PREFIX, chat_id) - return self._read_string(key) - + key = self._make_key(CHAT_PREFIX, chat_id, model_name=model_name) + value = self._read_string(key) + if value: + return value + + if CHUTES_ENABLED and model_name and model_name != CHUTES_FALLBACK_MODEL_NAME: + fallback_key = self._make_key( + CHAT_PREFIX, + chat_id, + model_name=CHUTES_FALLBACK_MODEL_NAME, + ) + return self._read_string(fallback_key) + + return None cache = ChatCache() From 7ab07f96f335b5848d44f9a8418c0125213d1e18 Mon Sep 17 00:00:00 2001 From: mondaylord Date: Wed, 1 Apr 2026 15:52:18 +0800 Subject: [PATCH 03/22] feat: route /v1/chat/completions to chutes when enabled --- src/app/api/v1/openai.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index 463ee13..aca1e30 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -360,7 +360,9 @@ async def _chat_completions_impl( ) -# VLLM Chat completions (existing path, unchanged behavior) +# Chat completions (compat route): +# - CHUTES_ENABLED=false -> original vLLM backend behavior +# - CHUTES_ENABLED=true -> transparently route to Chutes backend @router.post("/chat/completions", dependencies=[Depends(verify_authorization_header)]) async def chat_completions( request: Request, @@ -372,6 +374,15 @@ async def chat_completions( x_e2ee_nonce: Optional[str] = Header(None, alias="X-E2EE-Nonce"), x_e2ee_timestamp: Optional[str] = Header(None, alias="X-E2EE-Timestamp"), ): + backend_url = VLLM_URL + outbound_headers = None + + if CHUTES_ENABLED: + if not CHUTES_API_KEY: + return error(status_code=503, message="CHUTES_API_KEY is not configured", type="chutes_misconfigured") + backend_url = CHUTES_CHAT_COMPLETIONS_URL + outbound_headers = _chutes_auth_headers() + return await _chat_completions_impl( request=request, x_request_hash=x_request_hash, @@ -381,7 +392,8 @@ async def chat_completions( x_e2ee_version=x_e2ee_version, x_e2ee_nonce=x_e2ee_nonce, x_e2ee_timestamp=x_e2ee_timestamp, - backend_url=VLLM_URL, + backend_url=backend_url, + outbound_headers=outbound_headers, ) From 45d8ff8bb11f6ba835a6fec46f3be1fd6d1c0aaf Mon Sep 17 00:00:00 2001 From: mondaylord Date: Wed, 1 Apr 2026 16:48:59 +0800 Subject: [PATCH 04/22] feat(attestation): add /v1/attestation/chain endpoint with binding proof --- src/app/api/v1/openai.py | 111 +++++++++++++++++++++++++++++++--- tests/app/test_openai_e2ee.py | 40 ++++++++++++ 2 files changed, 142 insertions(+), 9 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index aca1e30..c9b976d 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -1,5 +1,7 @@ import json import os +import time +import hashlib from hashlib import sha256 from typing import Optional @@ -268,6 +270,24 @@ def strip_empty_tool_calls(payload: dict) -> dict: return payload +def _normalize_signing_algo(signing_algo: str | None) -> str: + algo = ECDSA if signing_algo is None else signing_algo.strip().lower() + if algo not in [ECDSA, ED25519]: + raise ValueError("invalid_signing_algo") + return algo + + +def _build_proxy_attestation(signing_algo: str, nonce: str | None) -> dict: + context = ecdsa_context if signing_algo == ECDSA else ed25519_context + attestation = dict(generate_attestation(context, nonce)) + attestation["signing_public_key"] = local_model_public_key_hex(signing_algo) + + resp = dict(attestation) + resp["signing_public_key"] = attestation["signing_public_key"] + resp["all_attestations"] = [attestation] + return resp + + # Get attestation report of intel quote and nvidia payload @router.get("/attestation/report", dependencies=[Depends(verify_authorization_header)]) async def attestation_report( @@ -276,22 +296,95 @@ async def attestation_report( nonce: str | None = Query(None), signing_address: str | None = Query(None), ): - signing_algo = ECDSA if signing_algo is None else signing_algo - if signing_algo not in [ECDSA, ED25519]: + try: + algo = _normalize_signing_algo(signing_algo) + except ValueError: return invalid_signing_algo() - context = ecdsa_context if signing_algo == ECDSA else ed25519_context + try: + return _build_proxy_attestation(algo, nonce) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + + +@router.get("/attestation/chain", dependencies=[Depends(verify_authorization_header)]) +async def attestation_chain( + request: Request, + model: str = Query(...), + nonce: str = Query(...), + signing_algo: str | None = None, +): + if not CHUTES_ENABLED: + return error(status_code=503, message="Chutes route is disabled", type="chutes_disabled") + + if not CHUTES_API_KEY: + return error(status_code=503, message="CHUTES_API_KEY is not configured", type="chutes_misconfigured") + + try: + algo = _normalize_signing_algo(signing_algo) + except ValueError: + return invalid_signing_algo() + + if len(nonce) < 16: + return error( + status_code=400, + message="nonce must be at least 16 characters", + type="invalid_nonce", + ) try: - attestation = dict(generate_attestation(context, nonce)) + proxy_attestation = _build_proxy_attestation(algo, nonce) except ValueError as exc: raise HTTPException(status_code=400, detail=str(exc)) - attestation["signing_public_key"] = local_model_public_key_hex(signing_algo) - resp = dict(attestation) - resp["signing_public_key"] = attestation["signing_public_key"] - resp["all_attestations"] = [attestation] - return resp + upstream_params = {"model": model, "nonce": nonce, "signing_algo": algo} + async with httpx.AsyncClient( + timeout=httpx.Timeout(TIMEOUT), + headers=_with_outbound_headers(_chutes_auth_headers()), + ) as client: + upstream_response = await client.get( + f"{CHUTES_BASE_URL}/v1/attestation/report", + params=upstream_params, + ) + + if upstream_response.status_code != 200: + raise HTTPException(status_code=upstream_response.status_code, detail=upstream_response.text) + + upstream_attestation = upstream_response.json() + upstream_raw = json.dumps(upstream_attestation, sort_keys=True, separators=(",", ":")) + upstream_attestation_sha256 = hashlib.sha256(upstream_raw.encode("utf-8")).hexdigest() + + binding_payload = { + "nonce": nonce, + "timestamp": int(time.time()), + "provider": "chutes", + "upstream_base_url": CHUTES_BASE_URL, + "model": model, + "upstream_attestation_sha256": upstream_attestation_sha256, + } + binding_text = json.dumps(binding_payload, sort_keys=True, separators=(",", ":")) + context = ecdsa_context if algo == ECDSA else ed25519_context + + return { + "version": "1", + "proxy": { + "attestation": proxy_attestation, + "signing_public_key": proxy_attestation.get("signing_public_key"), + }, + "upstream": { + "provider": "chutes", + "base_url": CHUTES_BASE_URL, + "model": model, + "attestation": upstream_attestation, + "attestation_sha256": upstream_attestation_sha256, + }, + "binding_proof": { + "payload": binding_payload, + "signature": sign_message(context, binding_text), + "signing_algo": algo, + "signing_address": context.signing_address, + }, + } async def _chat_completions_impl( diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py index 2b85760..9842d83 100644 --- a/tests/app/test_openai_e2ee.py +++ b/tests/app/test_openai_e2ee.py @@ -332,3 +332,43 @@ def test_attestation_report_includes_signing_public_key(): assert "signing_public_key" in data assert len(data["signing_public_key"]) == 64 assert data["all_attestations"][0]["signing_public_key"] == data["signing_public_key"] + + +@pytest.mark.asyncio +@pytest.mark.respx +async def test_attestation_chain_success(respx_mock): + upstream_url = "https://llm.chutes.ai/v1/attestation/report" + upstream_att = {"report": "upstream-ok", "nonce": "n" * 16} + respx_mock.get(upstream_url).mock(return_value=httpx.Response(200, json=upstream_att)) + + with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"): + response = client.get( + "/v1/attestation/chain", + params={"model": "moonshotai/Kimi-K2.5-TEE", "nonce": "a" * 16, "signing_algo": "ecdsa"}, + headers={"Authorization": TEST_AUTH_HEADER}, + ) + + assert response.status_code == 200 + data = response.json() + assert data["version"] == "1" + assert data["proxy"]["attestation"]["request_nonce"] == "a" * 16 + assert data["upstream"]["attestation"] == upstream_att + + expected_hash = __import__("hashlib").sha256( + json.dumps(upstream_att, sort_keys=True, separators=(",", ":")).encode("utf-8") + ).hexdigest() + assert data["upstream"]["attestation_sha256"] == expected_hash + assert data["binding_proof"]["payload"]["upstream_attestation_sha256"] == expected_hash + assert data["binding_proof"]["payload"]["model"] == "moonshotai/Kimi-K2.5-TEE" + + +def test_attestation_chain_nonce_too_short(): + with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"): + response = client.get( + "/v1/attestation/chain", + params={"model": "moonshotai/Kimi-K2.5-TEE", "nonce": "short", "signing_algo": "ecdsa"}, + headers={"Authorization": TEST_AUTH_HEADER}, + ) + + assert response.status_code == 400 + assert response.json()["error"]["type"] == "invalid_nonce" From d85efa0856bd3ac2f2c71fa2b2b18bef0e48f7e0 Mon Sep 17 00:00:00 2001 From: mondaylord Date: Wed, 1 Apr 2026 16:50:23 +0800 Subject: [PATCH 05/22] fix(attestation-chain): validate input and handle upstream fetch/JSON failures --- src/app/api/v1/openai.py | 28 +++++++++++++++++++--------- tests/app/test_openai_e2ee.py | 15 ++++++++++++++- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index c9b976d..77fa58e 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -325,12 +325,16 @@ async def attestation_chain( except ValueError: return invalid_signing_algo() + nonce = nonce.strip() + model = model.strip() if len(nonce) < 16: return error( status_code=400, message="nonce must be at least 16 characters", type="invalid_nonce", ) + if not model: + return error(status_code=400, message="model must not be empty", type="invalid_model") try: proxy_attestation = _build_proxy_attestation(algo, nonce) @@ -338,19 +342,25 @@ async def attestation_chain( raise HTTPException(status_code=400, detail=str(exc)) upstream_params = {"model": model, "nonce": nonce, "signing_algo": algo} - async with httpx.AsyncClient( - timeout=httpx.Timeout(TIMEOUT), - headers=_with_outbound_headers(_chutes_auth_headers()), - ) as client: - upstream_response = await client.get( - f"{CHUTES_BASE_URL}/v1/attestation/report", - params=upstream_params, - ) + try: + async with httpx.AsyncClient( + timeout=httpx.Timeout(TIMEOUT), + headers=_with_outbound_headers(_chutes_auth_headers()), + ) as client: + upstream_response = await client.get( + f"{CHUTES_BASE_URL}/v1/attestation/report", + params=upstream_params, + ) + except httpx.RequestError as exc: + return error(status_code=502, message=f"Failed to fetch upstream attestation: {exc}", type="upstream_unreachable") if upstream_response.status_code != 200: raise HTTPException(status_code=upstream_response.status_code, detail=upstream_response.text) - upstream_attestation = upstream_response.json() + try: + upstream_attestation = upstream_response.json() + except ValueError: + return error(status_code=502, message="Upstream attestation response is not valid JSON", type="upstream_invalid_response") upstream_raw = json.dumps(upstream_attestation, sort_keys=True, separators=(",", ":")) upstream_attestation_sha256 = hashlib.sha256(upstream_raw.encode("utf-8")).hexdigest() diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py index 9842d83..099da75 100644 --- a/tests/app/test_openai_e2ee.py +++ b/tests/app/test_openai_e2ee.py @@ -1,3 +1,4 @@ +import hashlib import httpx import pytest from fastapi.testclient import TestClient @@ -354,7 +355,7 @@ async def test_attestation_chain_success(respx_mock): assert data["proxy"]["attestation"]["request_nonce"] == "a" * 16 assert data["upstream"]["attestation"] == upstream_att - expected_hash = __import__("hashlib").sha256( + expected_hash = hashlib.sha256( json.dumps(upstream_att, sort_keys=True, separators=(",", ":")).encode("utf-8") ).hexdigest() assert data["upstream"]["attestation_sha256"] == expected_hash @@ -372,3 +373,15 @@ def test_attestation_chain_nonce_too_short(): assert response.status_code == 400 assert response.json()["error"]["type"] == "invalid_nonce" + + +def test_attestation_chain_model_empty(): + with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"): + response = client.get( + "/v1/attestation/chain", + params={"model": " ", "nonce": "a" * 16, "signing_algo": "ecdsa"}, + headers={"Authorization": TEST_AUTH_HEADER}, + ) + + assert response.status_code == 400 + assert response.json()["error"]["type"] == "invalid_model" From fabf438b3b2b5b49dcd02663a5db7f99c1606001 Mon Sep 17 00:00:00 2001 From: mondaylord Date: Wed, 1 Apr 2026 17:07:18 +0800 Subject: [PATCH 06/22] test(attestation): add standalone verification scripts for report and chain endpoints --- tests/app/verify_attestation_chain.py | 74 +++++++++++++++++++ .../app/verify_attestation_chain_negative.py | 37 ++++++++++ tests/app/verify_attestation_report.py | 47 ++++++++++++ 3 files changed, 158 insertions(+) create mode 100644 tests/app/verify_attestation_chain.py create mode 100644 tests/app/verify_attestation_chain_negative.py create mode 100644 tests/app/verify_attestation_report.py diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py new file mode 100644 index 0000000..22995ad --- /dev/null +++ b/tests/app/verify_attestation_chain.py @@ -0,0 +1,74 @@ +import os +import json +import time +import hashlib +import requests + +BASE_URL = os.environ.get("BASE_URL", "").rstrip("/") +API_KEY = os.environ.get("API_KEY", "") +MODEL_NAME = os.environ.get("MODEL_NAME", "") +SIGNING_ALGO = os.environ.get("SIGNING_ALGO", "ecdsa").lower() + + +def _canonical_json(obj) -> str: + return json.dumps(obj, sort_keys=True, separators=(",", ":")) + + +def main(): + if not BASE_URL or not API_KEY or not MODEL_NAME: + raise RuntimeError("Please set BASE_URL, API_KEY, MODEL_NAME") + + nonce = f"chain-{int(time.time())}-proof" + + url = f"{BASE_URL}/v1/attestation/chain" + resp = requests.get( + url, + params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO}, + headers={"Authorization": f"Bearer {API_KEY}"}, + timeout=90, + ) + + print("status:", resp.status_code) + resp.raise_for_status() + + data = resp.json() + + # Top-level structure checks + assert data.get("version") == "1", "unexpected chain version" + assert "proxy" in data and "upstream" in data and "binding_proof" in data, "missing chain sections" + + # Proxy section + proxy = data["proxy"] + proxy_att = proxy["attestation"] + assert proxy.get("signing_public_key"), "missing proxy.signing_public_key" + assert proxy_att.get("signing_public_key") == proxy["signing_public_key"], "proxy signing_public_key mismatch" + + # Upstream hash consistency + upstream = data["upstream"] + upstream_att = upstream["attestation"] + upstream_hash = hashlib.sha256(_canonical_json(upstream_att).encode("utf-8")).hexdigest() + assert upstream.get("attestation_sha256") == upstream_hash, "upstream attestation hash mismatch" + + # Binding payload consistency + bp = data["binding_proof"] + payload = bp["payload"] + assert payload.get("nonce") == nonce, "binding payload nonce mismatch" + assert payload.get("model") == MODEL_NAME, "binding payload model mismatch" + assert payload.get("upstream_attestation_sha256") == upstream_hash, "binding payload hash mismatch" + assert bp.get("signing_algo") == SIGNING_ALGO, "binding signing_algo mismatch" + assert bp.get("signature"), "binding signature missing" + + # Optional sanity: if upstream carries nonce, it should match + upstream_nonce = upstream_att.get("request_nonce") or upstream_att.get("nonce") + if upstream_nonce is not None: + assert upstream_nonce == nonce, "upstream nonce does not match request nonce" + + print("[OK] /v1/attestation/chain validated") + print("nonce:", nonce) + print("model:", MODEL_NAME) + print("signing_algo:", SIGNING_ALGO) + print("upstream_attestation_sha256:", upstream_hash) + + +if __name__ == "__main__": + main() diff --git a/tests/app/verify_attestation_chain_negative.py b/tests/app/verify_attestation_chain_negative.py new file mode 100644 index 0000000..62adc6d --- /dev/null +++ b/tests/app/verify_attestation_chain_negative.py @@ -0,0 +1,37 @@ +import os +import requests + +BASE_URL = os.environ.get("BASE_URL", "").rstrip("/") +API_KEY = os.environ.get("API_KEY", "") +MODEL_NAME = os.environ.get("MODEL_NAME", "") +SIGNING_ALGO = os.environ.get("SIGNING_ALGO", "ecdsa").lower() + + +def main(): + if not BASE_URL or not API_KEY or not MODEL_NAME: + raise RuntimeError("Please set BASE_URL, API_KEY, MODEL_NAME") + + # nonce deliberately too short, should fail with invalid_nonce + resp = requests.get( + f"{BASE_URL}/v1/attestation/chain", + params={"model": MODEL_NAME, "nonce": "short", "signing_algo": SIGNING_ALGO}, + headers={"Authorization": f"Bearer {API_KEY}"}, + timeout=60, + ) + + print("status:", resp.status_code) + print("body:", resp.text) + + if resp.status_code != 400: + raise RuntimeError(f"Expected 400 for short nonce, got {resp.status_code}") + + data = resp.json() + err_type = (data.get("error") or {}).get("type") + if err_type != "invalid_nonce": + raise RuntimeError(f"Expected error.type=invalid_nonce, got {err_type}") + + print("[OK] negative check passed (invalid_nonce)") + + +if __name__ == "__main__": + main() diff --git a/tests/app/verify_attestation_report.py b/tests/app/verify_attestation_report.py new file mode 100644 index 0000000..6c7a3b3 --- /dev/null +++ b/tests/app/verify_attestation_report.py @@ -0,0 +1,47 @@ +import os +import requests + +BASE_URL = os.environ.get("BASE_URL", "").rstrip("/") +API_KEY = os.environ.get("API_KEY", "") +MODEL_NAME = os.environ.get("MODEL_NAME", "") # kept for consistent runner env +SIGNING_ALGO = os.environ.get("SIGNING_ALGO", "ecdsa").lower() + + +def main(): + if not BASE_URL or not API_KEY: + raise RuntimeError("Please set BASE_URL and API_KEY") + + url = f"{BASE_URL}/v1/attestation/report" + resp = requests.get( + url, + params={"signing_algo": SIGNING_ALGO}, + headers={"Authorization": f"Bearer {API_KEY}"}, + timeout=60, + ) + + print("status:", resp.status_code) + resp.raise_for_status() + data = resp.json() + + assert "signing_public_key" in data, "missing signing_public_key" + assert "all_attestations" in data and isinstance(data["all_attestations"], list), "missing all_attestations" + assert data["all_attestations"], "all_attestations is empty" + assert ( + data["all_attestations"][0].get("signing_public_key") == data["signing_public_key"] + ), "top-level signing_public_key mismatch" + + expected_len = 128 if SIGNING_ALGO == "ecdsa" else 64 + assert len(data["signing_public_key"]) == expected_len, ( + f"unexpected signing_public_key length: {len(data['signing_public_key'])}, " + f"expected {expected_len} for {SIGNING_ALGO}" + ) + + print("[OK] /v1/attestation/report validated") + print("signing_algo:", SIGNING_ALGO) + print("signing_public_key len:", len(data["signing_public_key"])) + if MODEL_NAME: + print("model_name (unused in this check):", MODEL_NAME) + + +if __name__ == "__main__": + main() From ddd5408ccfda3bd731dd2ae767442f950fb35322 Mon Sep 17 00:00:00 2001 From: mondaylord Date: Wed, 1 Apr 2026 17:11:56 +0800 Subject: [PATCH 07/22] fix(attestation-chain): use api.chutes.ai for upstream attestation endpoint --- src/app/api/v1/openai.py | 7 ++++--- tests/app/test_openai_e2ee.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index 77fa58e..931a9b2 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -53,6 +53,7 @@ CHUTES_ENABLED = os.getenv("CHUTES_ENABLED", "false").lower() in ("1", "true", "yes", "on") CHUTES_BASE_URL = os.getenv("CHUTES_BASE_URL", "https://llm.chutes.ai").rstrip("/") +CHUTES_ATTESTATION_BASE_URL = os.getenv("CHUTES_ATTESTATION_BASE_URL", "https://api.chutes.ai").rstrip("/") CHUTES_CHAT_COMPLETIONS_URL = f"{CHUTES_BASE_URL}/v1/chat/completions" CHUTES_MODELS_URL = f"{CHUTES_BASE_URL}/v1/models" CHUTES_API_KEY = os.getenv("CHUTES_API_KEY") @@ -348,7 +349,7 @@ async def attestation_chain( headers=_with_outbound_headers(_chutes_auth_headers()), ) as client: upstream_response = await client.get( - f"{CHUTES_BASE_URL}/v1/attestation/report", + f"{CHUTES_ATTESTATION_BASE_URL}/v1/attestation/report", params=upstream_params, ) except httpx.RequestError as exc: @@ -368,7 +369,7 @@ async def attestation_chain( "nonce": nonce, "timestamp": int(time.time()), "provider": "chutes", - "upstream_base_url": CHUTES_BASE_URL, + "upstream_base_url": CHUTES_ATTESTATION_BASE_URL, "model": model, "upstream_attestation_sha256": upstream_attestation_sha256, } @@ -383,7 +384,7 @@ async def attestation_chain( }, "upstream": { "provider": "chutes", - "base_url": CHUTES_BASE_URL, + "base_url": CHUTES_ATTESTATION_BASE_URL, "model": model, "attestation": upstream_attestation, "attestation_sha256": upstream_attestation_sha256, diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py index 099da75..3b816bc 100644 --- a/tests/app/test_openai_e2ee.py +++ b/tests/app/test_openai_e2ee.py @@ -338,7 +338,7 @@ def test_attestation_report_includes_signing_public_key(): @pytest.mark.asyncio @pytest.mark.respx async def test_attestation_chain_success(respx_mock): - upstream_url = "https://llm.chutes.ai/v1/attestation/report" + upstream_url = "https://api.chutes.ai/v1/attestation/report" upstream_att = {"report": "upstream-ok", "nonce": "n" * 16} respx_mock.get(upstream_url).mock(return_value=httpx.Response(200, json=upstream_att)) From 5bdab6f653d06f90d458f303692bf98e6494778b Mon Sep 17 00:00:00 2001 From: mondaylord Date: Wed, 1 Apr 2026 17:30:33 +0800 Subject: [PATCH 08/22] fix(test): use hex nonce in attestation chain verification script --- tests/app/verify_attestation_chain.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py index 22995ad..4b9a4d8 100644 --- a/tests/app/verify_attestation_chain.py +++ b/tests/app/verify_attestation_chain.py @@ -1,7 +1,7 @@ import os import json -import time import hashlib +import secrets import requests BASE_URL = os.environ.get("BASE_URL", "").rstrip("/") @@ -18,7 +18,8 @@ def main(): if not BASE_URL or not API_KEY or not MODEL_NAME: raise RuntimeError("Please set BASE_URL, API_KEY, MODEL_NAME") - nonce = f"chain-{int(time.time())}-proof" + # Use 32-byte hex nonce to match attestation implementations that expect hex challenge. + nonce = secrets.token_hex(32) url = f"{BASE_URL}/v1/attestation/chain" resp = requests.get( From 7b5b4cba685d5d3f1abbc69ce8a87f2d14322e0d Mon Sep 17 00:00:00 2001 From: mondaylord Date: Wed, 1 Apr 2026 17:34:24 +0800 Subject: [PATCH 09/22] fix(attestation-chain): surface upstream rate-limit errors and improve verifier output --- src/app/api/v1/openai.py | 7 +++++++ tests/app/verify_attestation_chain.py | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index 931a9b2..8ebf151 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -355,6 +355,13 @@ async def attestation_chain( except httpx.RequestError as exc: return error(status_code=502, message=f"Failed to fetch upstream attestation: {exc}", type="upstream_unreachable") + if upstream_response.status_code == 429: + retry_after = upstream_response.headers.get("Retry-After") + msg = "Upstream attestation is rate limited" + if retry_after: + msg = f"{msg}; retry after {retry_after} seconds" + return error(status_code=429, message=msg, type="upstream_rate_limited") + if upstream_response.status_code != 200: raise HTTPException(status_code=upstream_response.status_code, detail=upstream_response.text) diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py index 4b9a4d8..69fd55c 100644 --- a/tests/app/verify_attestation_chain.py +++ b/tests/app/verify_attestation_chain.py @@ -30,6 +30,14 @@ def main(): ) print("status:", resp.status_code) + if resp.status_code == 429: + print("body:", resp.text) + raise RuntimeError( + "429 from /v1/attestation/chain (likely upstream Chutes attestation rate limit). " + "Wait and retry, or reduce verification call frequency." + ) + if resp.status_code >= 400: + print("body:", resp.text) resp.raise_for_status() data = resp.json() From d7dbef82b7a8e02f1fcff956a0d0f75b7168347f Mon Sep 17 00:00:00 2001 From: mondaylord Date: Wed, 1 Apr 2026 17:42:32 +0800 Subject: [PATCH 10/22] fix(attestation-chain): follow chutes evidence flow (model->chute->instances->evidence) --- src/app/api/v1/openai.py | 117 ++++++++++++++++++++++++++++------ tests/app/test_openai_e2ee.py | 42 +++++++++--- 2 files changed, 132 insertions(+), 27 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index 8ebf151..bae25c1 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -57,6 +57,8 @@ CHUTES_CHAT_COMPLETIONS_URL = f"{CHUTES_BASE_URL}/v1/chat/completions" CHUTES_MODELS_URL = f"{CHUTES_BASE_URL}/v1/models" CHUTES_API_KEY = os.getenv("CHUTES_API_KEY") +CHUTES_CHUTE_ID_CACHE: dict[str, tuple[str, float]] = {} +CHUTES_CHUTE_ID_CACHE_TTL_SECONDS = int(os.getenv("CHUTES_CHUTE_ID_CACHE_TTL_SECONDS", "3600")) TIMEOUT = 60 * 10 @@ -289,6 +291,98 @@ def _build_proxy_attestation(signing_algo: str, nonce: str | None) -> dict: return resp +def _error_from_upstream_429(response: httpx.Response): + retry_after = response.headers.get("Retry-After") + msg = "Upstream attestation is rate limited" + if retry_after: + msg = f"{msg}; retry after {retry_after} seconds" + return error(status_code=429, message=msg, type="upstream_rate_limited") + + +async def _resolve_chute_id(client: httpx.AsyncClient, model: str) -> str | dict: + now = time.time() + cached = CHUTES_CHUTE_ID_CACHE.get(model) + if cached and now - cached[1] < CHUTES_CHUTE_ID_CACHE_TTL_SECONDS: + return cached[0] + + resp = await client.get( + f"{CHUTES_ATTESTATION_BASE_URL}/chutes/", + params={"include_public": "true", "name": model}, + ) + if resp.status_code == 429: + return _error_from_upstream_429(resp) + if resp.status_code != 200: + raise HTTPException(status_code=resp.status_code, detail=resp.text) + + data = resp.json() + items = data.get("items") or [] + if not items: + return error(status_code=404, message=f"No chute found for model: {model}", type="upstream_model_not_found") + + chute_id = items[0].get("chute_id") + if not chute_id: + return error(status_code=502, message="Upstream chute lookup missing chute_id", type="upstream_invalid_response") + + CHUTES_CHUTE_ID_CACHE[model] = (chute_id, now) + return chute_id + + +async def _fetch_chutes_attestation(client: httpx.AsyncClient, model: str, nonce: str) -> tuple[dict | None, dict | None]: + chute_id_or_error = await _resolve_chute_id(client, model) + if isinstance(chute_id_or_error, dict) and chute_id_or_error.get("error"): + return None, chute_id_or_error + chute_id = chute_id_or_error + + e2e_resp = await client.get(f"{CHUTES_ATTESTATION_BASE_URL}/e2e/instances/{chute_id}") + if e2e_resp.status_code == 429: + return None, _error_from_upstream_429(e2e_resp) + if e2e_resp.status_code != 200: + raise HTTPException(status_code=e2e_resp.status_code, detail=e2e_resp.text) + + e2e_data = e2e_resp.json() + instances = e2e_data.get("instances") or [] + pubkeys = {i.get("instance_id"): i.get("e2e_pubkey") for i in instances if i.get("instance_id")} + + evidence_resp = await client.get( + f"{CHUTES_ATTESTATION_BASE_URL}/chutes/{chute_id}/evidence", + params={"nonce": nonce}, + ) + if evidence_resp.status_code == 429: + return None, _error_from_upstream_429(evidence_resp) + if evidence_resp.status_code != 200: + raise HTTPException(status_code=evidence_resp.status_code, detail=evidence_resp.text) + + evidence_data = evidence_resp.json() + evidence_list = evidence_data.get("evidence") or [] + + all_attestations = [] + for e in evidence_list: + iid = e.get("instance_id") + e2e_pubkey = pubkeys.get(iid) + if not iid or not e2e_pubkey: + continue + all_attestations.append( + { + "instance_id": iid, + "nonce": nonce, + "e2e_pubkey": e2e_pubkey, + "intel_quote": e.get("quote"), + "gpu_evidence": e.get("gpu_evidence", []), + "certificate": e.get("certificate"), + } + ) + + if not all_attestations: + return None, error(status_code=502, message="No usable upstream attestations returned", type="upstream_invalid_response") + + return { + "attestation_type": "chutes", + "nonce": nonce, + "chute_id": chute_id, + "all_attestations": all_attestations, + }, None + + # Get attestation report of intel quote and nvidia payload @router.get("/attestation/report", dependencies=[Depends(verify_authorization_header)]) async def attestation_report( @@ -342,33 +436,18 @@ async def attestation_chain( except ValueError as exc: raise HTTPException(status_code=400, detail=str(exc)) - upstream_params = {"model": model, "nonce": nonce, "signing_algo": algo} try: async with httpx.AsyncClient( timeout=httpx.Timeout(TIMEOUT), - headers=_with_outbound_headers(_chutes_auth_headers()), + headers={"Authorization": f"Bearer {CHUTES_API_KEY}"}, ) as client: - upstream_response = await client.get( - f"{CHUTES_ATTESTATION_BASE_URL}/v1/attestation/report", - params=upstream_params, - ) + upstream_attestation, upstream_error = await _fetch_chutes_attestation(client, model, nonce) except httpx.RequestError as exc: return error(status_code=502, message=f"Failed to fetch upstream attestation: {exc}", type="upstream_unreachable") - if upstream_response.status_code == 429: - retry_after = upstream_response.headers.get("Retry-After") - msg = "Upstream attestation is rate limited" - if retry_after: - msg = f"{msg}; retry after {retry_after} seconds" - return error(status_code=429, message=msg, type="upstream_rate_limited") - - if upstream_response.status_code != 200: - raise HTTPException(status_code=upstream_response.status_code, detail=upstream_response.text) + if upstream_error is not None: + return upstream_error - try: - upstream_attestation = upstream_response.json() - except ValueError: - return error(status_code=502, message="Upstream attestation response is not valid JSON", type="upstream_invalid_response") upstream_raw = json.dumps(upstream_attestation, sort_keys=True, separators=(",", ":")) upstream_attestation_sha256 = hashlib.sha256(upstream_raw.encode("utf-8")).hexdigest() diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py index 3b816bc..46ae695 100644 --- a/tests/app/test_openai_e2ee.py +++ b/tests/app/test_openai_e2ee.py @@ -338,29 +338,55 @@ def test_attestation_report_includes_signing_public_key(): @pytest.mark.asyncio @pytest.mark.respx async def test_attestation_chain_success(respx_mock): - upstream_url = "https://api.chutes.ai/v1/attestation/report" - upstream_att = {"report": "upstream-ok", "nonce": "n" * 16} - respx_mock.get(upstream_url).mock(return_value=httpx.Response(200, json=upstream_att)) + model = "moonshotai/Kimi-K2.5-TEE" + nonce = "a" * 16 + + respx_mock.get("https://api.chutes.ai/chutes/").mock( + return_value=httpx.Response(200, json={"items": [{"chute_id": "chute-123"}]}) + ) + respx_mock.get("https://api.chutes.ai/e2e/instances/chute-123").mock( + return_value=httpx.Response( + 200, + json={"instances": [{"instance_id": "inst-1", "e2e_pubkey": "pk-1"}]}, + ) + ) + respx_mock.get("https://api.chutes.ai/chutes/chute-123/evidence").mock( + return_value=httpx.Response( + 200, + json={ + "evidence": [ + { + "instance_id": "inst-1", + "quote": "intel-quote", + "gpu_evidence": [{"gpu": "ok"}], + "certificate": "cert", + } + ] + }, + ) + ) with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"): response = client.get( "/v1/attestation/chain", - params={"model": "moonshotai/Kimi-K2.5-TEE", "nonce": "a" * 16, "signing_algo": "ecdsa"}, + params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"}, headers={"Authorization": TEST_AUTH_HEADER}, ) assert response.status_code == 200 data = response.json() assert data["version"] == "1" - assert data["proxy"]["attestation"]["request_nonce"] == "a" * 16 - assert data["upstream"]["attestation"] == upstream_att + assert data["proxy"]["attestation"]["request_nonce"] == nonce + assert data["upstream"]["attestation"]["attestation_type"] == "chutes" + assert data["upstream"]["attestation"]["chute_id"] == "chute-123" + assert data["upstream"]["attestation"]["all_attestations"][0]["instance_id"] == "inst-1" expected_hash = hashlib.sha256( - json.dumps(upstream_att, sort_keys=True, separators=(",", ":")).encode("utf-8") + json.dumps(data["upstream"]["attestation"], sort_keys=True, separators=(",", ":")).encode("utf-8") ).hexdigest() assert data["upstream"]["attestation_sha256"] == expected_hash assert data["binding_proof"]["payload"]["upstream_attestation_sha256"] == expected_hash - assert data["binding_proof"]["payload"]["model"] == "moonshotai/Kimi-K2.5-TEE" + assert data["binding_proof"]["payload"]["model"] == model def test_attestation_chain_nonce_too_short(): From f46dc076b2a0ef4144cad54aa82dd7ffc94c6917 Mon Sep 17 00:00:00 2001 From: mondaylord Date: Wed, 1 Apr 2026 17:50:15 +0800 Subject: [PATCH 11/22] fix(test): add retry and configurable timeout for attestation chain verification --- tests/app/verify_attestation_chain.py | 58 +++++++++++++++++++-------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py index 69fd55c..c0e904d 100644 --- a/tests/app/verify_attestation_chain.py +++ b/tests/app/verify_attestation_chain.py @@ -3,11 +3,15 @@ import hashlib import secrets import requests +from requests.exceptions import ReadTimeout BASE_URL = os.environ.get("BASE_URL", "").rstrip("/") API_KEY = os.environ.get("API_KEY", "") MODEL_NAME = os.environ.get("MODEL_NAME", "") SIGNING_ALGO = os.environ.get("SIGNING_ALGO", "ecdsa").lower() +CONNECT_TIMEOUT = int(os.environ.get("CONNECT_TIMEOUT", "15")) +READ_TIMEOUT = int(os.environ.get("READ_TIMEOUT", "300")) +MAX_RETRIES = int(os.environ.get("MAX_RETRIES", "3")) def _canonical_json(obj) -> str: @@ -22,23 +26,43 @@ def main(): nonce = secrets.token_hex(32) url = f"{BASE_URL}/v1/attestation/chain" - resp = requests.get( - url, - params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO}, - headers={"Authorization": f"Bearer {API_KEY}"}, - timeout=90, - ) - - print("status:", resp.status_code) - if resp.status_code == 429: - print("body:", resp.text) - raise RuntimeError( - "429 from /v1/attestation/chain (likely upstream Chutes attestation rate limit). " - "Wait and retry, or reduce verification call frequency." - ) - if resp.status_code >= 400: - print("body:", resp.text) - resp.raise_for_status() + + resp = None + for attempt in range(1, MAX_RETRIES + 1): + try: + resp = requests.get( + url, + params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO}, + headers={"Authorization": f"Bearer {API_KEY}"}, + timeout=(CONNECT_TIMEOUT, READ_TIMEOUT), + ) + except ReadTimeout: + if attempt == MAX_RETRIES: + raise RuntimeError( + f"Read timeout after {MAX_RETRIES} attempts (connect={CONNECT_TIMEOUT}s, read={READ_TIMEOUT}s)." + ) + print(f"attempt {attempt}/{MAX_RETRIES} timed out, retrying...") + continue + + print("status:", resp.status_code) + if resp.status_code == 429: + retry_after = resp.headers.get("Retry-After") + print("body:", resp.text) + if attempt == MAX_RETRIES: + raise RuntimeError( + "429 from /v1/attestation/chain (likely upstream Chutes attestation rate limit). " + f"Retry-After={retry_after}." + ) + print(f"attempt {attempt}/{MAX_RETRIES} got 429, retrying...") + continue + + if resp.status_code >= 400: + print("body:", resp.text) + resp.raise_for_status() + break + + if resp is None: + raise RuntimeError("No response received") data = resp.json() From 84021eb2213cf91e329d05739e9b6b33f5046f16 Mon Sep 17 00:00:00 2001 From: mondaylord Date: Thu, 2 Apr 2026 09:55:20 +0800 Subject: [PATCH 12/22] test(attestation): verify binding_proof signature against proxy signing identity --- tests/app/verify_attestation_chain.py | 43 +++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py index c0e904d..d90be17 100644 --- a/tests/app/verify_attestation_chain.py +++ b/tests/app/verify_attestation_chain.py @@ -4,6 +4,9 @@ import secrets import requests from requests.exceptions import ReadTimeout +from eth_account import Account +from eth_account.messages import encode_defunct +from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey BASE_URL = os.environ.get("BASE_URL", "").rstrip("/") API_KEY = os.environ.get("API_KEY", "") @@ -18,6 +21,37 @@ def _canonical_json(obj) -> str: return json.dumps(obj, sort_keys=True, separators=(",", ":")) +def _verify_binding_signature(binding_proof: dict): + payload_text = _canonical_json(binding_proof["payload"]) + signing_algo = binding_proof.get("signing_algo") + signature = binding_proof.get("signature") + signing_address = binding_proof.get("signing_address") + + if signing_algo == "ecdsa": + if not signature or not signature.startswith("0x"): + raise RuntimeError("invalid ecdsa signature format") + recovered = Account.recover_message( + encode_defunct(text=payload_text), + signature=signature, + ) + if recovered.lower() != (signing_address or "").lower(): + raise RuntimeError( + f"binding signature mismatch: recovered={recovered}, expected={signing_address}" + ) + return recovered + + if signing_algo == "ed25519": + # In this project ed25519 signing_address is the raw public key hex. + pubkey_hex = signing_address or "" + if len(pubkey_hex) != 64: + raise RuntimeError("invalid ed25519 signing_address/public key") + pubkey = Ed25519PublicKey.from_public_bytes(bytes.fromhex(pubkey_hex)) + pubkey.verify(bytes.fromhex(signature), payload_text.encode("utf-8")) + return pubkey_hex + + raise RuntimeError(f"unsupported signing_algo: {signing_algo}") + + def main(): if not BASE_URL or not API_KEY or not MODEL_NAME: raise RuntimeError("Please set BASE_URL, API_KEY, MODEL_NAME") @@ -91,6 +125,14 @@ def main(): assert bp.get("signing_algo") == SIGNING_ALGO, "binding signing_algo mismatch" assert bp.get("signature"), "binding signature missing" + # Signature verification: proves the binding payload was signed by proxy signing identity. + recovered_signer = _verify_binding_signature(bp) + proxy_signing_address = proxy_att.get("signing_address") + if proxy_signing_address: + assert recovered_signer.lower() == proxy_signing_address.lower(), ( + f"proxy signing address mismatch: recovered={recovered_signer}, proxy={proxy_signing_address}" + ) + # Optional sanity: if upstream carries nonce, it should match upstream_nonce = upstream_att.get("request_nonce") or upstream_att.get("nonce") if upstream_nonce is not None: @@ -100,6 +142,7 @@ def main(): print("nonce:", nonce) print("model:", MODEL_NAME) print("signing_algo:", SIGNING_ALGO) + print("binding_signer:", recovered_signer) print("upstream_attestation_sha256:", upstream_hash) From 48d564d115c6931ad13d37e993a95ac76b62cea6 Mon Sep 17 00:00:00 2001 From: mondaylord Date: Thu, 2 Apr 2026 14:57:25 +0800 Subject: [PATCH 13/22] feat: add proxy-mode chutes verification with passthrough option --- src/app/api/v1/openai.py | 182 ++++++++++++++++++++++++-- tests/app/test_openai_e2ee.py | 124 ++++++++++++++++-- tests/app/verify_attestation_chain.py | 71 +++++----- 3 files changed, 323 insertions(+), 54 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index bae25c1..814589e 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -1,9 +1,9 @@ +import base64 import json import os import time -import hashlib from hashlib import sha256 -from typing import Optional +from typing import Any, Optional import httpx from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, Header, Query @@ -368,6 +368,8 @@ async def _fetch_chutes_attestation(client: httpx.AsyncClient, model: str, nonce "e2e_pubkey": e2e_pubkey, "intel_quote": e.get("quote"), "gpu_evidence": e.get("gpu_evidence", []), + "gpu_tokens": e.get("gpu_tokens"), + "tdx_verification": e.get("tdx_verification"), "certificate": e.get("certificate"), } ) @@ -383,6 +385,109 @@ async def _fetch_chutes_attestation(client: httpx.AsyncClient, model: str, nonce }, None +def _decode_quote(quote_b64: str) -> bytes: + return base64.b64decode(quote_b64) + + +def _extract_td_attributes(quote_bytes: bytes) -> int: + body = quote_bytes[48 : 48 + 584] + td_attributes_hex = body[120:128].hex() + return int(td_attributes_hex, 16) + + +def _extract_report_data_sha256(quote_bytes: bytes) -> str: + td_report_bytes = quote_bytes[48:632] + report_data_hex = td_report_bytes[520:584].hex().lower() + return report_data_hex[:64] + + +def _decode_jwt_payload_without_verification(token: str) -> dict[str, Any]: + parts = token.split(".") + if len(parts) < 2: + raise ValueError("invalid_jwt_format") + payload = parts[1] + payload += "=" * (-len(payload) % 4) + decoded = base64.urlsafe_b64decode(payload.encode("utf-8")).decode("utf-8") + return json.loads(decoded) + + +def _extract_gpu_tokens(attestation: dict[str, Any]) -> Any: + if "gpu_tokens" in attestation: + return attestation.get("gpu_tokens") + return attestation.get("gpu_evidence") + + +def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -> list[str]: + errors: list[str] = [] + + quote_b64 = attestation.get("intel_quote") or attestation.get("quote") + e2e_pubkey = attestation.get("e2e_pubkey") + if not quote_b64: + return ["missing_intel_quote"] + if not e2e_pubkey: + return ["missing_e2e_pubkey"] + + try: + quote_bytes = _decode_quote(quote_b64) + except Exception: + return ["invalid_quote_base64"] + + tdx_result = (attestation.get("tdx_verification") or {}).get("result") or {} + tdx_status = tdx_result.get("status") + if tdx_status != "UpToDate": + errors.append(f"tdx_status_not_uptodate:{tdx_status or 'missing'}") + + try: + td_attributes = _extract_td_attributes(quote_bytes) + if td_attributes & 1: + errors.append("tdx_debug_mode_enabled") + except Exception: + errors.append("tdx_attributes_parse_failed") + + expected_report_data = sha256((nonce + e2e_pubkey).encode("utf-8")).hexdigest().lower() + actual_report_data = _extract_report_data_sha256(quote_bytes) + if actual_report_data != expected_report_data: + errors.append("report_data_binding_mismatch") + + gpu_tokens = _extract_gpu_tokens(attestation) + if isinstance(gpu_tokens, dict): + if gpu_tokens.get("error"): + errors.append("gpu_tokens_error") + tokens = gpu_tokens.get("tokens") + if tokens: + try: + platform_entry = tokens[0] + if not isinstance(platform_entry, list) or len(platform_entry) < 2: + errors.append("gpu_platform_token_format_invalid") + else: + platform_claims = _decode_jwt_payload_without_verification(platform_entry[1]) + if platform_claims.get("x-nvidia-overall-att-result") is not True: + errors.append("gpu_overall_attestation_failed") + if platform_claims.get("eat_nonce") != expected_report_data: + errors.append("gpu_eat_nonce_mismatch") + except Exception: + errors.append("gpu_tokens_parse_failed") + + return errors + + +def _verify_chutes_attestation_bundle(attestation_bundle: dict[str, Any], nonce: str) -> tuple[bool, list[dict[str, Any]]]: + details: list[dict[str, Any]] = [] + attestations = attestation_bundle.get("all_attestations") or [] + if not attestations: + return False, [{"instance_id": None, "errors": ["missing_all_attestations"]}] + + all_ok = True + for att in attestations: + instance_id = att.get("instance_id") + att_errors = _verify_single_chutes_attestation(att, nonce) + if att_errors: + all_ok = False + details.append({"instance_id": instance_id, "errors": att_errors}) + + return all_ok, details + + # Get attestation report of intel quote and nvidia payload @router.get("/attestation/report", dependencies=[Depends(verify_authorization_header)]) async def attestation_report( @@ -408,6 +513,7 @@ async def attestation_chain( model: str = Query(...), nonce: str = Query(...), signing_algo: str | None = None, + verify_mode: str = Query("proxy"), ): if not CHUTES_ENABLED: return error(status_code=503, message="Chutes route is disabled", type="chutes_disabled") @@ -422,6 +528,10 @@ async def attestation_chain( nonce = nonce.strip() model = model.strip() + mode = verify_mode.strip().lower() + if mode not in {"proxy", "passthrough"}: + return error(status_code=400, message="verify_mode must be one of: proxy, passthrough", type="invalid_verify_mode") + if len(nonce) < 16: return error( status_code=400, @@ -449,7 +559,9 @@ async def attestation_chain( return upstream_error upstream_raw = json.dumps(upstream_attestation, sort_keys=True, separators=(",", ":")) - upstream_attestation_sha256 = hashlib.sha256(upstream_raw.encode("utf-8")).hexdigest() + upstream_attestation_sha256 = sha256(upstream_raw.encode("utf-8")).hexdigest() + + context = ecdsa_context if algo == ECDSA else ed25519_context binding_payload = { "nonce": nonce, @@ -460,24 +572,66 @@ async def attestation_chain( "upstream_attestation_sha256": upstream_attestation_sha256, } binding_text = json.dumps(binding_payload, sort_keys=True, separators=(",", ":")) - context = ecdsa_context if algo == ECDSA else ed25519_context + binding_proof = { + "payload": binding_payload, + "signature": sign_message(context, binding_text), + "signing_algo": algo, + "signing_address": context.signing_address, + } + + if mode == "passthrough": + return { + "version": "1", + "verify_mode": mode, + "proxy": { + "attestation": proxy_attestation, + "signing_public_key": proxy_attestation.get("signing_public_key"), + }, + "upstream": { + "provider": "chutes", + "base_url": CHUTES_ATTESTATION_BASE_URL, + "model": model, + "attestation": upstream_attestation, + "attestation_sha256": upstream_attestation_sha256, + }, + "binding_proof": binding_proof, + } + + verified, verification_details = _verify_chutes_attestation_bundle(upstream_attestation, nonce) + if not verified: + return error( + status_code=502, + message=f"Chutes attestation verification failed in proxy mode: {json.dumps(verification_details, separators=(',', ':'))}", + type="chutes_verification_failed", + ) + + receipt_payload = { + "nonce": nonce, + "request_hash": sha256(f"{model}:{nonce}".encode("utf-8")).hexdigest(), + "provider": "chutes", + "model": model, + "verify_mode": mode, + "verification_policy": "chutes-v1", + "verification_policy_version": "1", + "upstream_attestation_sha256": upstream_attestation_sha256, + "binding_signature": binding_proof["signature"], + "binding_signing_algo": binding_proof["signing_algo"], + "binding_signing_address": binding_proof["signing_address"], + "verified_at": int(time.time()), + "result": "pass", + } + receipt_text = json.dumps(receipt_payload, sort_keys=True, separators=(",", ":")) return { "version": "1", + "verify_mode": mode, "proxy": { "attestation": proxy_attestation, "signing_public_key": proxy_attestation.get("signing_public_key"), }, - "upstream": { - "provider": "chutes", - "base_url": CHUTES_ATTESTATION_BASE_URL, - "model": model, - "attestation": upstream_attestation, - "attestation_sha256": upstream_attestation_sha256, - }, - "binding_proof": { - "payload": binding_payload, - "signature": sign_message(context, binding_text), + "verification_receipt": { + "payload": receipt_payload, + "signature": sign_message(context, receipt_text), "signing_algo": algo, "signing_address": context.signing_address, }, diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py index 46ae695..1ee3607 100644 --- a/tests/app/test_openai_e2ee.py +++ b/tests/app/test_openai_e2ee.py @@ -1,4 +1,7 @@ +import base64 import hashlib +import json + import httpx import pytest from fastapi.testclient import TestClient @@ -335,11 +338,27 @@ def test_attestation_report_includes_signing_public_key(): assert data["all_attestations"][0]["signing_public_key"] == data["signing_public_key"] +def _make_chutes_quote_b64(nonce: str, e2e_pubkey: str, *, debug_enabled: bool = False) -> str: + quote_bytes = bytearray(700) + + td_attributes_offset = 48 + 120 + quote_bytes[td_attributes_offset : td_attributes_offset + 8] = (1 if debug_enabled else 0).to_bytes(8, "little") + + report_data_offset = 48 + 520 + report_data_hex = hashlib.sha256((nonce + e2e_pubkey).encode("utf-8")).hexdigest() + report_data_bytes = bytes.fromhex(report_data_hex) + bytes(32) + quote_bytes[report_data_offset : report_data_offset + 64] = report_data_bytes + + return base64.b64encode(bytes(quote_bytes)).decode("utf-8") + + @pytest.mark.asyncio @pytest.mark.respx -async def test_attestation_chain_success(respx_mock): +async def test_attestation_chain_success_proxy_mode(respx_mock): model = "moonshotai/Kimi-K2.5-TEE" nonce = "a" * 16 + e2e_pubkey = "pk-1" + quote_b64 = _make_chutes_quote_b64(nonce, e2e_pubkey) respx_mock.get("https://api.chutes.ai/chutes/").mock( return_value=httpx.Response(200, json={"items": [{"chute_id": "chute-123"}]}) @@ -347,7 +366,7 @@ async def test_attestation_chain_success(respx_mock): respx_mock.get("https://api.chutes.ai/e2e/instances/chute-123").mock( return_value=httpx.Response( 200, - json={"instances": [{"instance_id": "inst-1", "e2e_pubkey": "pk-1"}]}, + json={"instances": [{"instance_id": "inst-1", "e2e_pubkey": e2e_pubkey}]}, ) ) respx_mock.get("https://api.chutes.ai/chutes/chute-123/evidence").mock( @@ -357,8 +376,8 @@ async def test_attestation_chain_success(respx_mock): "evidence": [ { "instance_id": "inst-1", - "quote": "intel-quote", - "gpu_evidence": [{"gpu": "ok"}], + "quote": quote_b64, + "tdx_verification": {"result": {"status": "UpToDate"}}, "certificate": "cert", } ] @@ -376,17 +395,94 @@ async def test_attestation_chain_success(respx_mock): assert response.status_code == 200 data = response.json() assert data["version"] == "1" + assert data["verify_mode"] == "proxy" assert data["proxy"]["attestation"]["request_nonce"] == nonce - assert data["upstream"]["attestation"]["attestation_type"] == "chutes" - assert data["upstream"]["attestation"]["chute_id"] == "chute-123" - assert data["upstream"]["attestation"]["all_attestations"][0]["instance_id"] == "inst-1" + assert "verification_receipt" in data + assert data["verification_receipt"]["payload"]["result"] == "pass" + assert data["verification_receipt"]["payload"]["model"] == model + + +@pytest.mark.asyncio +@pytest.mark.respx +async def test_attestation_chain_passthrough_mode_returns_upstream_bundle(respx_mock): + model = "moonshotai/Kimi-K2.5-TEE" + nonce = "b" * 16 + e2e_pubkey = "pk-2" + quote_b64 = _make_chutes_quote_b64(nonce, e2e_pubkey) + + respx_mock.get("https://api.chutes.ai/chutes/").mock( + return_value=httpx.Response(200, json={"items": [{"chute_id": "chute-321"}]}) + ) + respx_mock.get("https://api.chutes.ai/e2e/instances/chute-321").mock( + return_value=httpx.Response( + 200, + json={"instances": [{"instance_id": "inst-2", "e2e_pubkey": e2e_pubkey}]}, + ) + ) + respx_mock.get("https://api.chutes.ai/chutes/chute-321/evidence").mock( + return_value=httpx.Response( + 200, + json={ + "evidence": [ + { + "instance_id": "inst-2", + "quote": quote_b64, + "tdx_verification": {"result": {"status": "UpToDate"}}, + "certificate": "cert", + } + ] + }, + ) + ) + + with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"): + response = client.get( + "/v1/attestation/chain", + params={"model": model, "nonce": nonce, "signing_algo": "ecdsa", "verify_mode": "passthrough"}, + headers={"Authorization": TEST_AUTH_HEADER}, + ) + assert response.status_code == 200 + data = response.json() + assert data["verify_mode"] == "passthrough" expected_hash = hashlib.sha256( json.dumps(data["upstream"]["attestation"], sort_keys=True, separators=(",", ":")).encode("utf-8") ).hexdigest() assert data["upstream"]["attestation_sha256"] == expected_hash assert data["binding_proof"]["payload"]["upstream_attestation_sha256"] == expected_hash - assert data["binding_proof"]["payload"]["model"] == model + + +def test_attestation_chain_proxy_mode_verification_failure_returns_502(): + model = "moonshotai/Kimi-K2.5-TEE" + nonce = "a" * 16 + + with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"), patch( + "app.api.v1.openai._fetch_chutes_attestation", + return_value=( + { + "attestation_type": "chutes", + "nonce": nonce, + "chute_id": "chute-123", + "all_attestations": [ + { + "instance_id": "inst-1", + "e2e_pubkey": "pk-1", + "intel_quote": "bad-quote", + "tdx_verification": {"result": {"status": "OutOfDate"}}, + } + ], + }, + None, + ), + ): + response = client.get( + "/v1/attestation/chain", + params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"}, + headers={"Authorization": TEST_AUTH_HEADER}, + ) + + assert response.status_code == 502 + assert response.json()["error"]["type"] == "chutes_verification_failed" def test_attestation_chain_nonce_too_short(): @@ -401,6 +497,18 @@ def test_attestation_chain_nonce_too_short(): assert response.json()["error"]["type"] == "invalid_nonce" +def test_attestation_chain_invalid_verify_mode(): + with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"): + response = client.get( + "/v1/attestation/chain", + params={"model": "moonshotai/Kimi-K2.5-TEE", "nonce": "a" * 16, "signing_algo": "ecdsa", "verify_mode": "bad"}, + headers={"Authorization": TEST_AUTH_HEADER}, + ) + + assert response.status_code == 400 + assert response.json()["error"]["type"] == "invalid_verify_mode" + + def test_attestation_chain_model_empty(): with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"): response = client.get( diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py index d90be17..ad03f5f 100644 --- a/tests/app/verify_attestation_chain.py +++ b/tests/app/verify_attestation_chain.py @@ -15,6 +15,7 @@ CONNECT_TIMEOUT = int(os.environ.get("CONNECT_TIMEOUT", "15")) READ_TIMEOUT = int(os.environ.get("READ_TIMEOUT", "300")) MAX_RETRIES = int(os.environ.get("MAX_RETRIES", "3")) +VERIFY_MODE = os.environ.get("VERIFY_MODE", "proxy").lower() def _canonical_json(obj) -> str: @@ -66,7 +67,7 @@ def main(): try: resp = requests.get( url, - params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO}, + params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO, "verify_mode": VERIFY_MODE}, headers={"Authorization": f"Bearer {API_KEY}"}, timeout=(CONNECT_TIMEOUT, READ_TIMEOUT), ) @@ -102,48 +103,54 @@ def main(): # Top-level structure checks assert data.get("version") == "1", "unexpected chain version" - assert "proxy" in data and "upstream" in data and "binding_proof" in data, "missing chain sections" + assert data.get("verify_mode") == VERIFY_MODE, f"unexpected verify_mode: {data.get('verify_mode')}" + assert "proxy" in data, "missing proxy section" - # Proxy section proxy = data["proxy"] proxy_att = proxy["attestation"] assert proxy.get("signing_public_key"), "missing proxy.signing_public_key" assert proxy_att.get("signing_public_key") == proxy["signing_public_key"], "proxy signing_public_key mismatch" - # Upstream hash consistency - upstream = data["upstream"] - upstream_att = upstream["attestation"] - upstream_hash = hashlib.sha256(_canonical_json(upstream_att).encode("utf-8")).hexdigest() - assert upstream.get("attestation_sha256") == upstream_hash, "upstream attestation hash mismatch" - - # Binding payload consistency - bp = data["binding_proof"] - payload = bp["payload"] - assert payload.get("nonce") == nonce, "binding payload nonce mismatch" - assert payload.get("model") == MODEL_NAME, "binding payload model mismatch" - assert payload.get("upstream_attestation_sha256") == upstream_hash, "binding payload hash mismatch" - assert bp.get("signing_algo") == SIGNING_ALGO, "binding signing_algo mismatch" - assert bp.get("signature"), "binding signature missing" - - # Signature verification: proves the binding payload was signed by proxy signing identity. - recovered_signer = _verify_binding_signature(bp) - proxy_signing_address = proxy_att.get("signing_address") - if proxy_signing_address: - assert recovered_signer.lower() == proxy_signing_address.lower(), ( - f"proxy signing address mismatch: recovered={recovered_signer}, proxy={proxy_signing_address}" - ) + if VERIFY_MODE == "passthrough": + assert "upstream" in data and "binding_proof" in data, "missing passthrough sections" + upstream = data["upstream"] + upstream_att = upstream["attestation"] + upstream_hash = hashlib.sha256(_canonical_json(upstream_att).encode("utf-8")).hexdigest() + assert upstream.get("attestation_sha256") == upstream_hash, "upstream attestation hash mismatch" + + bp = data["binding_proof"] + payload = bp["payload"] + assert payload.get("nonce") == nonce, "binding payload nonce mismatch" + assert payload.get("model") == MODEL_NAME, "binding payload model mismatch" + assert payload.get("upstream_attestation_sha256") == upstream_hash, "binding payload hash mismatch" + assert bp.get("signing_algo") == SIGNING_ALGO, "binding signing_algo mismatch" + assert bp.get("signature"), "binding signature missing" + + recovered_signer = _verify_binding_signature(bp) + proxy_signing_address = proxy_att.get("signing_address") + if proxy_signing_address: + assert recovered_signer.lower() == proxy_signing_address.lower(), ( + f"proxy signing address mismatch: recovered={recovered_signer}, proxy={proxy_signing_address}" + ) + + print("[OK] /v1/attestation/chain passthrough validated") + print("binding_signer:", recovered_signer) + print("upstream_attestation_sha256:", upstream_hash) + else: + assert "verification_receipt" in data, "missing verification_receipt" + receipt = data["verification_receipt"] + assert receipt.get("signature"), "receipt signature missing" + payload = receipt.get("payload") or {} + assert payload.get("result") == "pass", "receipt result is not pass" + assert payload.get("model") == MODEL_NAME, "receipt model mismatch" + assert payload.get("nonce") == nonce, "receipt nonce mismatch" - # Optional sanity: if upstream carries nonce, it should match - upstream_nonce = upstream_att.get("request_nonce") or upstream_att.get("nonce") - if upstream_nonce is not None: - assert upstream_nonce == nonce, "upstream nonce does not match request nonce" + print("[OK] /v1/attestation/chain proxy mode validated") - print("[OK] /v1/attestation/chain validated") print("nonce:", nonce) print("model:", MODEL_NAME) print("signing_algo:", SIGNING_ALGO) - print("binding_signer:", recovered_signer) - print("upstream_attestation_sha256:", upstream_hash) + print("verify_mode:", VERIFY_MODE) if __name__ == "__main__": From f1abb378924668114a60bfe4f75796e625aa295f Mon Sep 17 00:00:00 2001 From: mondaylord Date: Thu, 2 Apr 2026 15:15:18 +0800 Subject: [PATCH 14/22] fix: tighten chutes proxy verification parity checks --- src/app/api/v1/openai.py | 9 +++++++-- tests/app/test_openai_e2ee.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index 814589e..e4be1f5 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -386,7 +386,7 @@ async def _fetch_chutes_attestation(client: httpx.AsyncClient, model: str, nonce def _decode_quote(quote_b64: str) -> bytes: - return base64.b64decode(quote_b64) + return base64.b64decode(quote_b64, validate=True) def _extract_td_attributes(quote_bytes: bytes) -> int: @@ -432,7 +432,12 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) - except Exception: return ["invalid_quote_base64"] - tdx_result = (attestation.get("tdx_verification") or {}).get("result") or {} + tdx_verification = attestation.get("tdx_verification") or {} + tdx_error = tdx_verification.get("error") + if tdx_error: + errors.append("tdx_online_verification_error") + + tdx_result = tdx_verification.get("result") or {} tdx_status = tdx_result.get("status") if tdx_status != "UpToDate": errors.append(f"tdx_status_not_uptodate:{tdx_status or 'missing'}") diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py index 1ee3607..f520125 100644 --- a/tests/app/test_openai_e2ee.py +++ b/tests/app/test_openai_e2ee.py @@ -497,6 +497,39 @@ def test_attestation_chain_nonce_too_short(): assert response.json()["error"]["type"] == "invalid_nonce" +def test_attestation_chain_proxy_mode_rejects_tdx_online_verification_error(): + model = "moonshotai/Kimi-K2.5-TEE" + nonce = "c" * 16 + + with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"), patch( + "app.api.v1.openai._fetch_chutes_attestation", + return_value=( + { + "attestation_type": "chutes", + "nonce": nonce, + "chute_id": "chute-123", + "all_attestations": [ + { + "instance_id": "inst-1", + "e2e_pubkey": "pk-1", + "intel_quote": _make_chutes_quote_b64(nonce, "pk-1"), + "tdx_verification": {"error": "upstream verifier timeout", "result": {"status": "UpToDate"}}, + } + ], + }, + None, + ), + ): + response = client.get( + "/v1/attestation/chain", + params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"}, + headers={"Authorization": TEST_AUTH_HEADER}, + ) + + assert response.status_code == 502 + assert response.json()["error"]["type"] == "chutes_verification_failed" + + def test_attestation_chain_invalid_verify_mode(): with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"): response = client.get( From ac1e3ca62df2328c5285395ef55a059eef45f438 Mon Sep 17 00:00:00 2001 From: mondaylord Date: Thu, 2 Apr 2026 17:01:35 +0800 Subject: [PATCH 15/22] fix: align proxy mode with optional chutes tdx status field --- src/app/api/v1/openai.py | 9 ++++---- tests/app/test_openai_e2ee.py | 43 +++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index e4be1f5..ba69d6f 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -437,10 +437,11 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) - if tdx_error: errors.append("tdx_online_verification_error") - tdx_result = tdx_verification.get("result") or {} - tdx_status = tdx_result.get("status") - if tdx_status != "UpToDate": - errors.append(f"tdx_status_not_uptodate:{tdx_status or 'missing'}") + tdx_result = tdx_verification.get("result") + if tdx_result: + tdx_status = tdx_result.get("status") + if tdx_status != "UpToDate": + errors.append(f"tdx_status_not_uptodate:{tdx_status or 'missing'}") try: td_attributes = _extract_td_attributes(quote_bytes) diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py index f520125..7b0e3ff 100644 --- a/tests/app/test_openai_e2ee.py +++ b/tests/app/test_openai_e2ee.py @@ -497,6 +497,49 @@ def test_attestation_chain_nonce_too_short(): assert response.json()["error"]["type"] == "invalid_nonce" +@pytest.mark.asyncio +@pytest.mark.respx +async def test_attestation_chain_proxy_mode_allows_missing_tdx_result_if_other_checks_pass(respx_mock): + model = "moonshotai/Kimi-K2.5-TEE" + nonce = "d" * 16 + e2e_pubkey = "pk-3" + quote_b64 = _make_chutes_quote_b64(nonce, e2e_pubkey) + + respx_mock.get("https://api.chutes.ai/chutes/").mock( + return_value=httpx.Response(200, json={"items": [{"chute_id": "chute-777"}]}) + ) + respx_mock.get("https://api.chutes.ai/e2e/instances/chute-777").mock( + return_value=httpx.Response( + 200, + json={"instances": [{"instance_id": "inst-3", "e2e_pubkey": e2e_pubkey}]}, + ) + ) + respx_mock.get("https://api.chutes.ai/chutes/chute-777/evidence").mock( + return_value=httpx.Response( + 200, + json={ + "evidence": [ + { + "instance_id": "inst-3", + "quote": quote_b64, + "certificate": "cert", + } + ] + }, + ) + ) + + with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"): + response = client.get( + "/v1/attestation/chain", + params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"}, + headers={"Authorization": TEST_AUTH_HEADER}, + ) + + assert response.status_code == 200 + assert response.json()["verification_receipt"]["payload"]["result"] == "pass" + + def test_attestation_chain_proxy_mode_rejects_tdx_online_verification_error(): model = "moonshotai/Kimi-K2.5-TEE" nonce = "c" * 16 From dc3242e46b4788ccdbce3be9f893fe7cef978cde Mon Sep 17 00:00:00 2001 From: mondaylord Date: Thu, 2 Apr 2026 17:26:32 +0800 Subject: [PATCH 16/22] feat: enrich proxy attestation receipt and test both verify modes --- src/app/api/v1/openai.py | 63 +++++++++++++++++++++------ tests/app/test_openai_e2ee.py | 4 ++ tests/app/verify_attestation_chain.py | 51 ++++++++++++++-------- 3 files changed, 87 insertions(+), 31 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index ba69d6f..1ed51a3 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -417,34 +417,49 @@ def _extract_gpu_tokens(attestation: dict[str, Any]) -> Any: return attestation.get("gpu_evidence") -def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -> list[str]: +def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -> dict[str, Any]: errors: list[str] = [] + detail: dict[str, Any] = { + "instance_id": attestation.get("instance_id"), + "tdx_status": "missing", + "tdx_error_present": False, + "debug_mode_disabled": False, + "binding_verified": False, + "gpu_token_checked": False, + "gpu_verified": None, + } quote_b64 = attestation.get("intel_quote") or attestation.get("quote") e2e_pubkey = attestation.get("e2e_pubkey") if not quote_b64: - return ["missing_intel_quote"] + detail["errors"] = ["missing_intel_quote"] + return detail if not e2e_pubkey: - return ["missing_e2e_pubkey"] + detail["errors"] = ["missing_e2e_pubkey"] + return detail try: quote_bytes = _decode_quote(quote_b64) except Exception: - return ["invalid_quote_base64"] + detail["errors"] = ["invalid_quote_base64"] + return detail tdx_verification = attestation.get("tdx_verification") or {} tdx_error = tdx_verification.get("error") if tdx_error: + detail["tdx_error_present"] = True errors.append("tdx_online_verification_error") tdx_result = tdx_verification.get("result") if tdx_result: - tdx_status = tdx_result.get("status") + tdx_status = tdx_result.get("status") or "missing" + detail["tdx_status"] = tdx_status if tdx_status != "UpToDate": - errors.append(f"tdx_status_not_uptodate:{tdx_status or 'missing'}") + errors.append(f"tdx_status_not_uptodate:{tdx_status}") try: td_attributes = _extract_td_attributes(quote_bytes) + detail["debug_mode_disabled"] = not (td_attributes & 1) if td_attributes & 1: errors.append("tdx_debug_mode_enabled") except Exception: @@ -452,29 +467,42 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) - expected_report_data = sha256((nonce + e2e_pubkey).encode("utf-8")).hexdigest().lower() actual_report_data = _extract_report_data_sha256(quote_bytes) + detail["expected_report_data"] = expected_report_data + detail["actual_report_data"] = actual_report_data if actual_report_data != expected_report_data: errors.append("report_data_binding_mismatch") + else: + detail["binding_verified"] = True gpu_tokens = _extract_gpu_tokens(attestation) if isinstance(gpu_tokens, dict): if gpu_tokens.get("error"): + detail["gpu_token_checked"] = True + detail["gpu_verified"] = False errors.append("gpu_tokens_error") tokens = gpu_tokens.get("tokens") if tokens: + detail["gpu_token_checked"] = True try: platform_entry = tokens[0] if not isinstance(platform_entry, list) or len(platform_entry) < 2: + detail["gpu_verified"] = False errors.append("gpu_platform_token_format_invalid") else: platform_claims = _decode_jwt_payload_without_verification(platform_entry[1]) - if platform_claims.get("x-nvidia-overall-att-result") is not True: + overall_ok = platform_claims.get("x-nvidia-overall-att-result") is True + nonce_ok = platform_claims.get("eat_nonce") == expected_report_data + detail["gpu_verified"] = overall_ok and nonce_ok + if not overall_ok: errors.append("gpu_overall_attestation_failed") - if platform_claims.get("eat_nonce") != expected_report_data: + if not nonce_ok: errors.append("gpu_eat_nonce_mismatch") except Exception: + detail["gpu_verified"] = False errors.append("gpu_tokens_parse_failed") - return errors + detail["errors"] = errors + return detail def _verify_chutes_attestation_bundle(attestation_bundle: dict[str, Any], nonce: str) -> tuple[bool, list[dict[str, Any]]]: @@ -485,11 +513,10 @@ def _verify_chutes_attestation_bundle(attestation_bundle: dict[str, Any], nonce: all_ok = True for att in attestations: - instance_id = att.get("instance_id") - att_errors = _verify_single_chutes_attestation(att, nonce) - if att_errors: + att_detail = _verify_single_chutes_attestation(att, nonce) + if att_detail.get("errors"): all_ok = False - details.append({"instance_id": instance_id, "errors": att_errors}) + details.append(att_detail) return all_ok, details @@ -611,6 +638,10 @@ async def attestation_chain( type="chutes_verification_failed", ) + total_instances = len(verification_details) + uptodate_instances = sum(1 for d in verification_details if d.get("tdx_status") == "UpToDate") + binding_verified_instances = sum(1 for d in verification_details if d.get("binding_verified") is True) + receipt_payload = { "nonce": nonce, "request_hash": sha256(f"{model}:{nonce}".encode("utf-8")).hexdigest(), @@ -625,6 +656,12 @@ async def attestation_chain( "binding_signing_address": binding_proof["signing_address"], "verified_at": int(time.time()), "result": "pass", + "verification_summary": { + "total_instances": total_instances, + "tdx_uptodate_instances": uptodate_instances, + "binding_verified_instances": binding_verified_instances, + }, + "instance_results": verification_details, } receipt_text = json.dumps(receipt_payload, sort_keys=True, separators=(",", ":")) diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py index 7b0e3ff..9cb87e3 100644 --- a/tests/app/test_openai_e2ee.py +++ b/tests/app/test_openai_e2ee.py @@ -400,6 +400,10 @@ async def test_attestation_chain_success_proxy_mode(respx_mock): assert "verification_receipt" in data assert data["verification_receipt"]["payload"]["result"] == "pass" assert data["verification_receipt"]["payload"]["model"] == model + summary = data["verification_receipt"]["payload"]["verification_summary"] + assert summary["total_instances"] == 1 + assert summary["binding_verified_instances"] == 1 + assert len(data["verification_receipt"]["payload"]["instance_results"]) == 1 @pytest.mark.asyncio diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py index ad03f5f..d4ac426 100644 --- a/tests/app/verify_attestation_chain.py +++ b/tests/app/verify_attestation_chain.py @@ -15,7 +15,7 @@ CONNECT_TIMEOUT = int(os.environ.get("CONNECT_TIMEOUT", "15")) READ_TIMEOUT = int(os.environ.get("READ_TIMEOUT", "300")) MAX_RETRIES = int(os.environ.get("MAX_RETRIES", "3")) -VERIFY_MODE = os.environ.get("VERIFY_MODE", "proxy").lower() +VERIFY_MODE = os.environ.get("VERIFY_MODE", "both").lower() def _canonical_json(obj) -> str: @@ -53,13 +53,8 @@ def _verify_binding_signature(binding_proof: dict): raise RuntimeError(f"unsupported signing_algo: {signing_algo}") -def main(): - if not BASE_URL or not API_KEY or not MODEL_NAME: - raise RuntimeError("Please set BASE_URL, API_KEY, MODEL_NAME") - - # Use 32-byte hex nonce to match attestation implementations that expect hex challenge. +def _run_mode(verify_mode: str): nonce = secrets.token_hex(32) - url = f"{BASE_URL}/v1/attestation/chain" resp = None @@ -67,7 +62,7 @@ def main(): try: resp = requests.get( url, - params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO, "verify_mode": VERIFY_MODE}, + params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO, "verify_mode": verify_mode}, headers={"Authorization": f"Bearer {API_KEY}"}, timeout=(CONNECT_TIMEOUT, READ_TIMEOUT), ) @@ -76,23 +71,23 @@ def main(): raise RuntimeError( f"Read timeout after {MAX_RETRIES} attempts (connect={CONNECT_TIMEOUT}s, read={READ_TIMEOUT}s)." ) - print(f"attempt {attempt}/{MAX_RETRIES} timed out, retrying...") + print(f"[{verify_mode}] attempt {attempt}/{MAX_RETRIES} timed out, retrying...") continue - print("status:", resp.status_code) + print(f"[{verify_mode}] status:", resp.status_code) if resp.status_code == 429: retry_after = resp.headers.get("Retry-After") - print("body:", resp.text) + print(f"[{verify_mode}] body:", resp.text) if attempt == MAX_RETRIES: raise RuntimeError( "429 from /v1/attestation/chain (likely upstream Chutes attestation rate limit). " f"Retry-After={retry_after}." ) - print(f"attempt {attempt}/{MAX_RETRIES} got 429, retrying...") + print(f"[{verify_mode}] attempt {attempt}/{MAX_RETRIES} got 429, retrying...") continue if resp.status_code >= 400: - print("body:", resp.text) + print(f"[{verify_mode}] body:", resp.text) resp.raise_for_status() break @@ -100,10 +95,8 @@ def main(): raise RuntimeError("No response received") data = resp.json() - - # Top-level structure checks assert data.get("version") == "1", "unexpected chain version" - assert data.get("verify_mode") == VERIFY_MODE, f"unexpected verify_mode: {data.get('verify_mode')}" + assert data.get("verify_mode") == verify_mode, f"unexpected verify_mode: {data.get('verify_mode')}" assert "proxy" in data, "missing proxy section" proxy = data["proxy"] @@ -111,7 +104,7 @@ def main(): assert proxy.get("signing_public_key"), "missing proxy.signing_public_key" assert proxy_att.get("signing_public_key") == proxy["signing_public_key"], "proxy signing_public_key mismatch" - if VERIFY_MODE == "passthrough": + if verify_mode == "passthrough": assert "upstream" in data and "binding_proof" in data, "missing passthrough sections" upstream = data["upstream"] upstream_att = upstream["attestation"] @@ -145,12 +138,34 @@ def main(): assert payload.get("model") == MODEL_NAME, "receipt model mismatch" assert payload.get("nonce") == nonce, "receipt nonce mismatch" + summary = payload.get("verification_summary") or {} + instance_results = payload.get("instance_results") or [] + assert summary.get("total_instances") == len(instance_results), "summary total_instances mismatch" + assert summary.get("binding_verified_instances", 0) >= 1, "expected at least one binding-verified instance" + print("[OK] /v1/attestation/chain proxy mode validated") + print("verification_summary:", json.dumps(summary, ensure_ascii=False)) + print("instance_results_preview:", json.dumps(instance_results[:2], ensure_ascii=False)) print("nonce:", nonce) print("model:", MODEL_NAME) print("signing_algo:", SIGNING_ALGO) - print("verify_mode:", VERIFY_MODE) + print("verify_mode:", verify_mode) + + +def main(): + if not BASE_URL or not API_KEY or not MODEL_NAME: + raise RuntimeError("Please set BASE_URL, API_KEY, MODEL_NAME") + + if VERIFY_MODE == "both": + _run_mode("proxy") + _run_mode("passthrough") + return + + if VERIFY_MODE not in {"proxy", "passthrough"}: + raise RuntimeError("VERIFY_MODE must be one of: proxy, passthrough, both") + + _run_mode(VERIFY_MODE) if __name__ == "__main__": From 5eb1e20f1cf5a118aa180bfeb1ee78f742e7426a Mon Sep 17 00:00:00 2001 From: mondaylord Date: Thu, 2 Apr 2026 20:13:53 +0800 Subject: [PATCH 17/22] fix: make chain verifier script schema-tolerant for summary fields --- tests/app/verify_attestation_chain.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py index d4ac426..46f3bed 100644 --- a/tests/app/verify_attestation_chain.py +++ b/tests/app/verify_attestation_chain.py @@ -140,11 +140,21 @@ def _run_mode(verify_mode: str): summary = payload.get("verification_summary") or {} instance_results = payload.get("instance_results") or [] - assert summary.get("total_instances") == len(instance_results), "summary total_instances mismatch" - assert summary.get("binding_verified_instances", 0) >= 1, "expected at least one binding-verified instance" + + summary_total = summary.get("total_instances") + if summary_total is not None and instance_results: + assert summary_total == len(instance_results), ( + f"summary total_instances mismatch: summary={summary_total}, instance_results={len(instance_results)}" + ) + elif summary_total is not None and not instance_results: + print("[WARN] verification_summary exists but instance_results missing/empty. This may indicate an older server build.") + + if instance_results: + assert summary.get("binding_verified_instances", 0) >= 1, "expected at least one binding-verified instance" print("[OK] /v1/attestation/chain proxy mode validated") print("verification_summary:", json.dumps(summary, ensure_ascii=False)) + print("instance_results_count:", len(instance_results)) print("instance_results_preview:", json.dumps(instance_results[:2], ensure_ascii=False)) print("nonce:", nonce) From f5b94970338870748f175732b2bab237c6f96d16 Mon Sep 17 00:00:00 2001 From: mondaylord Date: Thu, 2 Apr 2026 21:01:30 +0800 Subject: [PATCH 18/22] feat: enforce online TDX verification in proxy mode --- src/app/api/v1/openai.py | 32 +++++++++++++++++++++++++-- tests/app/test_openai_e2ee.py | 15 ++++++++----- tests/app/verify_attestation_chain.py | 2 +- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index 1ed51a3..f545e66 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -1,7 +1,9 @@ +import asyncio import base64 import json import os import time +from concurrent.futures import ThreadPoolExecutor from hashlib import sha256 from typing import Any, Optional @@ -417,6 +419,30 @@ def _extract_gpu_tokens(attestation: dict[str, Any]) -> Any: return attestation.get("gpu_evidence") +def _verify_tdx_online(quote_b64: str) -> dict[str, Any]: + try: + import dcap_qvl + + quote_bytes = _decode_quote(quote_b64) + + def run_verification(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + return loop.run_until_complete(dcap_qvl.get_collateral_and_verify(quote_bytes)) + finally: + loop.close() + + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(run_verification) + verified_report = future.result(timeout=30) + + result = json.loads(verified_report.to_json()) + return {"result": result, "error": None} + except Exception as exc: + return {"result": None, "error": str(exc)} + + def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -> dict[str, Any]: errors: list[str] = [] detail: dict[str, Any] = { @@ -444,14 +470,16 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) - detail["errors"] = ["invalid_quote_base64"] return detail - tdx_verification = attestation.get("tdx_verification") or {} + tdx_verification = _verify_tdx_online(quote_b64) tdx_error = tdx_verification.get("error") if tdx_error: detail["tdx_error_present"] = True errors.append("tdx_online_verification_error") tdx_result = tdx_verification.get("result") - if tdx_result: + if not tdx_result: + errors.append("tdx_status_missing") + else: tdx_status = tdx_result.get("status") or "missing" detail["tdx_status"] = tdx_status if tdx_status != "UpToDate": diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py index 9cb87e3..441e0dd 100644 --- a/tests/app/test_openai_e2ee.py +++ b/tests/app/test_openai_e2ee.py @@ -385,7 +385,10 @@ async def test_attestation_chain_success_proxy_mode(respx_mock): ) ) - with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"): + with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"), patch( + "app.api.v1.openai._verify_tdx_online", + return_value={"result": {"status": "UpToDate"}, "error": None}, + ): response = client.get( "/v1/attestation/chain", params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"}, @@ -503,7 +506,7 @@ def test_attestation_chain_nonce_too_short(): @pytest.mark.asyncio @pytest.mark.respx -async def test_attestation_chain_proxy_mode_allows_missing_tdx_result_if_other_checks_pass(respx_mock): +async def test_attestation_chain_proxy_mode_uses_online_tdx_verification(respx_mock): model = "moonshotai/Kimi-K2.5-TEE" nonce = "d" * 16 e2e_pubkey = "pk-3" @@ -533,7 +536,10 @@ async def test_attestation_chain_proxy_mode_allows_missing_tdx_result_if_other_c ) ) - with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"): + with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"), patch( + "app.api.v1.openai._verify_tdx_online", + return_value={"result": {"status": "UpToDate"}, "error": None}, + ): response = client.get( "/v1/attestation/chain", params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"}, @@ -560,13 +566,12 @@ def test_attestation_chain_proxy_mode_rejects_tdx_online_verification_error(): "instance_id": "inst-1", "e2e_pubkey": "pk-1", "intel_quote": _make_chutes_quote_b64(nonce, "pk-1"), - "tdx_verification": {"error": "upstream verifier timeout", "result": {"status": "UpToDate"}}, } ], }, None, ), - ): + ), patch("app.api.v1.openai._verify_tdx_online", return_value={"result": None, "error": "tdx verify failed"}): response = client.get( "/v1/attestation/chain", params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"}, diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py index 46f3bed..a99d853 100644 --- a/tests/app/verify_attestation_chain.py +++ b/tests/app/verify_attestation_chain.py @@ -155,7 +155,7 @@ def _run_mode(verify_mode: str): print("[OK] /v1/attestation/chain proxy mode validated") print("verification_summary:", json.dumps(summary, ensure_ascii=False)) print("instance_results_count:", len(instance_results)) - print("instance_results_preview:", json.dumps(instance_results[:2], ensure_ascii=False)) + print("instance_results:", json.dumps(instance_results, ensure_ascii=False)) print("nonce:", nonce) print("model:", MODEL_NAME) From 0d20af95ebd93108240e677f6e36fb7fa4bcd682 Mon Sep 17 00:00:00 2001 From: mondaylord Date: Thu, 2 Apr 2026 21:14:39 +0800 Subject: [PATCH 19/22] chore: add dcap-qvl dependency for online TDX verification --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 3b6d536..0fdcbea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ dstack-sdk==0.5.3 cryptography==43.0.1 redis==5.2.1 nv-ppcie-verifier==1.5.0 +dcap-qvl>=0.3.13 From fc33de4ae71c627a533371d8427c9eeff8cff17d Mon Sep 17 00:00:00 2001 From: mondaylord Date: Fri, 3 Apr 2026 11:42:23 +0800 Subject: [PATCH 20/22] chore: refine online TDX verification and unify upstream error handling --- src/app/api/v1/openai.py | 41 +++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index f545e66..e2fd2b7 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -62,6 +62,9 @@ CHUTES_CHUTE_ID_CACHE: dict[str, tuple[str, float]] = {} CHUTES_CHUTE_ID_CACHE_TTL_SECONDS = int(os.getenv("CHUTES_CHUTE_ID_CACHE_TTL_SECONDS", "3600")) +# Shared executor for online TDX verification to avoid per-attestation thread creation. +TDX_EXECUTOR = ThreadPoolExecutor(max_workers=int(os.getenv("TDX_ONLINE_WORKERS", "4"))) + TIMEOUT = 60 * 10 COMMON_HEADERS = {"Content-Type": "application/json", "Accept": "application/json"} @@ -314,7 +317,11 @@ async def _resolve_chute_id(client: httpx.AsyncClient, model: str) -> str | dict if resp.status_code == 429: return _error_from_upstream_429(resp) if resp.status_code != 200: - raise HTTPException(status_code=resp.status_code, detail=resp.text) + return error( + status_code=502, + message=f"Failed to lookup chute by name: {resp.status_code} {resp.text}", + type="upstream_http_error", + ) data = resp.json() items = data.get("items") or [] @@ -339,7 +346,11 @@ async def _fetch_chutes_attestation(client: httpx.AsyncClient, model: str, nonce if e2e_resp.status_code == 429: return None, _error_from_upstream_429(e2e_resp) if e2e_resp.status_code != 200: - raise HTTPException(status_code=e2e_resp.status_code, detail=e2e_resp.text) + return None, error( + status_code=502, + message=f"Failed to fetch E2E public keys: {e2e_resp.status_code} {e2e_resp.text}", + type="upstream_http_error", + ) e2e_data = e2e_resp.json() instances = e2e_data.get("instances") or [] @@ -352,7 +363,11 @@ async def _fetch_chutes_attestation(client: httpx.AsyncClient, model: str, nonce if evidence_resp.status_code == 429: return None, _error_from_upstream_429(evidence_resp) if evidence_resp.status_code != 200: - raise HTTPException(status_code=evidence_resp.status_code, detail=evidence_resp.text) + return None, error( + status_code=502, + message=f"Failed to fetch evidence: {evidence_resp.status_code} {evidence_resp.text}", + type="upstream_http_error", + ) evidence_data = evidence_resp.json() evidence_list = evidence_data.get("evidence") or [] @@ -419,13 +434,17 @@ def _extract_gpu_tokens(attestation: dict[str, Any]) -> Any: return attestation.get("gpu_evidence") -def _verify_tdx_online(quote_b64: str) -> dict[str, Any]: +def _verify_tdx_online(quote: str | bytes) -> dict[str, Any]: + """Run online TDX verification via dcap_qvl in a shared thread pool. + + Accepts either base64-encoded quote (str) or raw bytes. + """ try: import dcap_qvl - quote_bytes = _decode_quote(quote_b64) + quote_bytes = _decode_quote(quote) if isinstance(quote, str) else quote - def run_verification(): + def run_verification() -> Any: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: @@ -433,9 +452,8 @@ def run_verification(): finally: loop.close() - with ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(run_verification) - verified_report = future.result(timeout=30) + future = TDX_EXECUTOR.submit(run_verification) + verified_report = future.result(timeout=30) result = json.loads(verified_report.to_json()) return {"result": result, "error": None} @@ -470,7 +488,7 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) - detail["errors"] = ["invalid_quote_base64"] return detail - tdx_verification = _verify_tdx_online(quote_b64) + tdx_verification = _verify_tdx_online(quote_bytes) tdx_error = tdx_verification.get("error") if tdx_error: detail["tdx_error_present"] = True @@ -478,7 +496,8 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) - tdx_result = tdx_verification.get("result") if not tdx_result: - errors.append("tdx_status_missing") + if not tdx_error: + errors.append("tdx_status_missing") else: tdx_status = tdx_result.get("status") or "missing" detail["tdx_status"] = tdx_status From 771d73b9ea87809ce4eaf0fda1323776b54636fb Mon Sep 17 00:00:00 2001 From: mondaylord Date: Fri, 3 Apr 2026 12:31:49 +0800 Subject: [PATCH 21/22] chutes: use 8 TDX online workers by default --- src/app/api/v1/openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index e2fd2b7..bef5ac0 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -63,7 +63,7 @@ CHUTES_CHUTE_ID_CACHE_TTL_SECONDS = int(os.getenv("CHUTES_CHUTE_ID_CACHE_TTL_SECONDS", "3600")) # Shared executor for online TDX verification to avoid per-attestation thread creation. -TDX_EXECUTOR = ThreadPoolExecutor(max_workers=int(os.getenv("TDX_ONLINE_WORKERS", "4"))) +TDX_EXECUTOR = ThreadPoolExecutor(max_workers=int(os.getenv("TDX_ONLINE_WORKERS", "8"))) TIMEOUT = 60 * 10 From 171d089b9c82d36730c56008e78af9ea05bebcd5 Mon Sep 17 00:00:00 2001 From: mondaylord Date: Fri, 3 Apr 2026 12:39:10 +0800 Subject: [PATCH 22/22] chutes: async TDX verification and trimmed error message --- src/app/api/v1/openai.py | 55 ++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py index bef5ac0..ce543d7 100644 --- a/src/app/api/v1/openai.py +++ b/src/app/api/v1/openai.py @@ -435,7 +435,7 @@ def _extract_gpu_tokens(attestation: dict[str, Any]) -> Any: def _verify_tdx_online(quote: str | bytes) -> dict[str, Any]: - """Run online TDX verification via dcap_qvl in a shared thread pool. + """Run online TDX verification via dcap_qvl synchronously. Accepts either base64-encoded quote (str) or raw bytes. """ @@ -444,16 +444,12 @@ def _verify_tdx_online(quote: str | bytes) -> dict[str, Any]: quote_bytes = _decode_quote(quote) if isinstance(quote, str) else quote - def run_verification() -> Any: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - return loop.run_until_complete(dcap_qvl.get_collateral_and_verify(quote_bytes)) - finally: - loop.close() - - future = TDX_EXECUTOR.submit(run_verification) - verified_report = future.result(timeout=30) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + verified_report = loop.run_until_complete(dcap_qvl.get_collateral_and_verify(quote_bytes)) + finally: + loop.close() result = json.loads(verified_report.to_json()) return {"result": result, "error": None} @@ -461,7 +457,17 @@ def run_verification() -> Any: return {"result": None, "error": str(exc)} -def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -> dict[str, Any]: +async def _verify_tdx_online_async(quote: str | bytes) -> dict[str, Any]: + """Async wrapper that runs TDX verification in the shared executor. + + This keeps the event loop responsive while using a bounded thread pool + for the heavy verification work. + """ + loop = asyncio.get_running_loop() + return await loop.run_in_executor(TDX_EXECUTOR, _verify_tdx_online, quote) + + +async def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -> dict[str, Any]: errors: list[str] = [] detail: dict[str, Any] = { "instance_id": attestation.get("instance_id"), @@ -488,7 +494,7 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) - detail["errors"] = ["invalid_quote_base64"] return detail - tdx_verification = _verify_tdx_online(quote_bytes) + tdx_verification = await _verify_tdx_online_async(quote_bytes) tdx_error = tdx_verification.get("error") if tdx_error: detail["tdx_error_present"] = True @@ -552,19 +558,20 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) - return detail -def _verify_chutes_attestation_bundle(attestation_bundle: dict[str, Any], nonce: str) -> tuple[bool, list[dict[str, Any]]]: +async def _verify_chutes_attestation_bundle(attestation_bundle: dict[str, Any], nonce: str) -> tuple[bool, list[dict[str, Any]]]: details: list[dict[str, Any]] = [] attestations = attestation_bundle.get("all_attestations") or [] if not attestations: return False, [{"instance_id": None, "errors": ["missing_all_attestations"]}] - all_ok = True - for att in attestations: - att_detail = _verify_single_chutes_attestation(att, nonce) - if att_detail.get("errors"): - all_ok = False - details.append(att_detail) + # Verify all instances concurrently, bounded by TDX_EXECUTOR size. + tasks = [ + _verify_single_chutes_attestation(att, nonce) + for att in attestations + ] + details = await asyncio.gather(*tasks) + all_ok = all(not d.get("errors") for d in details) return all_ok, details @@ -677,11 +684,15 @@ async def attestation_chain( "binding_proof": binding_proof, } - verified, verification_details = _verify_chutes_attestation_bundle(upstream_attestation, nonce) + verified, verification_details = await _verify_chutes_attestation_bundle(upstream_attestation, nonce) if not verified: + log.error( + "Chutes attestation verification failed in proxy mode: %s", + json.dumps(verification_details, separators=(",", ":")), + ) return error( status_code=502, - message=f"Chutes attestation verification failed in proxy mode: {json.dumps(verification_details, separators=(',', ':'))}", + message="Chutes attestation verification failed in proxy mode", type="chutes_verification_failed", )