From 865bf3087033b27ae374edba34ca498b3232f1ce Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Tue, 31 Mar 2026 21:41:21 +0800
Subject: [PATCH 01/22] feat: add dedicated chutes sidecar endpoints

---
 src/app/api/v1/openai.py | 167 ++++++++++++++++++++++++++++++++-------
 1 file changed, 137 insertions(+), 30 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index 5a48f1e..19c2994 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -48,6 +48,13 @@
 VLLM_COMPLETIONS_URL = f"{VLLM_BASE_URL}/v1/completions"
 VLLM_METRICS_URL = f"{VLLM_BASE_URL}/metrics"
 VLLM_MODELS_URL = f"{VLLM_BASE_URL}/v1/models"
+
+CHUTES_ENABLED = os.getenv("CHUTES_ENABLED", "false").lower() in ("1", "true", "yes", "on")
+CHUTES_BASE_URL = os.getenv("CHUTES_BASE_URL", "https://llm.chutes.ai").rstrip("/")
+CHUTES_CHAT_COMPLETIONS_URL = f"{CHUTES_BASE_URL}/v1/chat/completions"
+CHUTES_MODELS_URL = f"{CHUTES_BASE_URL}/v1/models"
+CHUTES_API_KEY = os.getenv("CHUTES_API_KEY")
+
 TIMEOUT = 60 * 10
 
 COMMON_HEADERS = {"Content-Type": "application/json", "Accept": "application/json"}
@@ -72,15 +79,29 @@ def sign_chat(text: str):
     )
 
 
+def _with_outbound_headers(outbound_headers: Optional[dict[str, str]] = None) -> dict[str, str]:
+    headers = dict(COMMON_HEADERS)
+    if outbound_headers:
+        headers.update(outbound_headers)
+    return headers
+
+
+def _chutes_auth_headers() -> dict[str, str]:
+    if CHUTES_API_KEY:
+        return {"Authorization": f"Bearer {CHUTES_API_KEY}"}
+    return {}
+
+
 async def stream_vllm_response(
     url: str,
     request_body: bytes,
     modified_request_body: bytes,
     request_hash: Optional[str] = None,
     e2ee_ctx=None,
+    outbound_headers: Optional[dict[str, str]] = None,
 ):
     """
-    Handle streaming vllm request
+    Handle streaming backend request.
     Args:
         request_body: The original request body
         modified_request_body: The modified enhanced request body
@@ -137,8 +158,10 @@ async def generate_stream(response):
             log.error(error_message)
             raise Exception(error_message)
 
-    client = httpx.AsyncClient(timeout=httpx.Timeout(TIMEOUT), headers=COMMON_HEADERS)
-    # Forward the request to the vllm backend
+    client = httpx.AsyncClient(
+        timeout=httpx.Timeout(TIMEOUT),
+        headers=_with_outbound_headers(outbound_headers),
+    )
     req = client.build_request("POST", url, content=modified_request_body)
     response = await client.send(req, stream=True)
     # If not 200, return the error response directly without streaming
@@ -171,6 +194,7 @@ async def non_stream_vllm_response(
     modified_request_body: bytes,
     request_hash: Optional[str] = None,
     e2ee_ctx=None,
+    outbound_headers: Optional[dict[str, str]] = None,
 ):
     """
     Handle non-streaming responses
@@ -191,7 +215,8 @@ async def non_stream_vllm_response(
         log.debug(f"Calculated request hash: {request_sha256}")
 
     async with httpx.AsyncClient(
-        timeout=httpx.Timeout(TIMEOUT), headers=COMMON_HEADERS
+        timeout=httpx.Timeout(TIMEOUT),
+        headers=_with_outbound_headers(outbound_headers),
     ) as client:
         response = await client.post(url, content=modified_request_body)
         if response.status_code != 200:
@@ -263,17 +288,17 @@ async def attestation_report(
     return resp
 
 
-# VLLM Chat completions
-@router.post("/chat/completions", dependencies=[Depends(verify_authorization_header)])
-async def chat_completions(
+async def _chat_completions_impl(
     request: Request,
-    x_request_hash: Optional[str] = Header(None, alias="X-Request-Hash"),
-    x_signing_algo: Optional[str] = Header(None, alias="X-Signing-Algo"),
-    x_client_pub_key: Optional[str] = Header(None, alias="X-Client-Pub-Key"),
-    x_model_pub_key: Optional[str] = Header(None, alias="X-Model-Pub-Key"),
-    x_e2ee_version: Optional[str] = Header(None, alias="X-E2EE-Version"),
-    x_e2ee_nonce: Optional[str] = Header(None, alias="X-E2EE-Nonce"),
-    x_e2ee_timestamp: Optional[str] = Header(None, alias="X-E2EE-Timestamp"),
+    x_request_hash: Optional[str],
+    x_signing_algo: Optional[str],
+    x_client_pub_key: Optional[str],
+    x_model_pub_key: Optional[str],
+    x_e2ee_version: Optional[str],
+    x_e2ee_nonce: Optional[str],
+    x_e2ee_timestamp: Optional[str],
+    backend_url: str,
+    outbound_headers: Optional[dict[str, str]] = None,
 ):
     # Keep original request body to calculate the request hash for attestation
     request_body = await request.body()
@@ -299,25 +324,89 @@ async def chat_completions(
     modified_json = strip_empty_tool_calls(request_json)
 
     # Check if the request is for streaming or non-streaming
-    is_stream = modified_json.get(
-        "stream", False
-    )  # Default to non-streaming if not specified
+    is_stream = modified_json.get("stream", False)
     modified_request_body = json.dumps(modified_json).encode("utf-8")
+
     if is_stream:
-        # Create a streaming response
         return await stream_vllm_response(
-            VLLM_URL, request_body, modified_request_body, x_request_hash, e2ee_ctx
-        )
-    else:
-        # Handle non-streaming response
-        response_data = await non_stream_vllm_response(
-            VLLM_URL, request_body, modified_request_body, x_request_hash, e2ee_ctx
-        )
-        return JSONResponse(
-            content=response_data,
-            headers=get_e2ee_response_headers(e2ee_ctx),
+            backend_url,
+            request_body,
+            modified_request_body,
+            x_request_hash,
+            e2ee_ctx,
+            outbound_headers=outbound_headers,
         )
 
+    response_data = await non_stream_vllm_response(
+        backend_url,
+        request_body,
+        modified_request_body,
+        x_request_hash,
+        e2ee_ctx,
+        outbound_headers=outbound_headers,
+    )
+    return JSONResponse(
+        content=response_data,
+        headers=get_e2ee_response_headers(e2ee_ctx),
+    )
+
+
+# VLLM Chat completions (existing path, unchanged behavior)
+@router.post("/chat/completions", dependencies=[Depends(verify_authorization_header)])
+async def chat_completions(
+    request: Request,
+    x_request_hash: Optional[str] = Header(None, alias="X-Request-Hash"),
+    x_signing_algo: Optional[str] = Header(None, alias="X-Signing-Algo"),
+    x_client_pub_key: Optional[str] = Header(None, alias="X-Client-Pub-Key"),
+    x_model_pub_key: Optional[str] = Header(None, alias="X-Model-Pub-Key"),
+    x_e2ee_version: Optional[str] = Header(None, alias="X-E2EE-Version"),
+    x_e2ee_nonce: Optional[str] = Header(None, alias="X-E2EE-Nonce"),
+    x_e2ee_timestamp: Optional[str] = Header(None, alias="X-E2EE-Timestamp"),
+):
+    return await _chat_completions_impl(
+        request=request,
+        x_request_hash=x_request_hash,
+        x_signing_algo=x_signing_algo,
+        x_client_pub_key=x_client_pub_key,
+        x_model_pub_key=x_model_pub_key,
+        x_e2ee_version=x_e2ee_version,
+        x_e2ee_nonce=x_e2ee_nonce,
+        x_e2ee_timestamp=x_e2ee_timestamp,
+        backend_url=VLLM_URL,
+    )
+
+
+# Chutes chat completions (new path, side-by-side with existing logic)
+@router.post("/chutes/chat/completions", dependencies=[Depends(verify_authorization_header)])
+async def chutes_chat_completions(
+    request: Request,
+    x_request_hash: Optional[str] = Header(None, alias="X-Request-Hash"),
+    x_signing_algo: Optional[str] = Header(None, alias="X-Signing-Algo"),
+    x_client_pub_key: Optional[str] = Header(None, alias="X-Client-Pub-Key"),
+    x_model_pub_key: Optional[str] = Header(None, alias="X-Model-Pub-Key"),
+    x_e2ee_version: Optional[str] = Header(None, alias="X-E2EE-Version"),
+    x_e2ee_nonce: Optional[str] = Header(None, alias="X-E2EE-Nonce"),
+    x_e2ee_timestamp: Optional[str] = Header(None, alias="X-E2EE-Timestamp"),
+):
+    if not CHUTES_ENABLED:
+        return error(status_code=503, message="Chutes route is disabled", type="chutes_disabled")
+
+    if not CHUTES_API_KEY:
+        return error(status_code=503, message="CHUTES_API_KEY is not configured", type="chutes_misconfigured")
+
+    return await _chat_completions_impl(
+        request=request,
+        x_request_hash=x_request_hash,
+        x_signing_algo=x_signing_algo,
+        x_client_pub_key=x_client_pub_key,
+        x_model_pub_key=x_model_pub_key,
+        x_e2ee_version=x_e2ee_version,
+        x_e2ee_nonce=x_e2ee_nonce,
+        x_e2ee_timestamp=x_e2ee_timestamp,
+        backend_url=CHUTES_CHAT_COMPLETIONS_URL,
+        outbound_headers=_chutes_auth_headers(),
+    )
+
 
 # VLLM completions
 @router.post("/completions", dependencies=[Depends(verify_authorization_header)])
@@ -410,7 +499,7 @@ async def signature(request: Request, chat_id: str, signing_algo: str = None):
 async def metrics(request: Request):
     # Get local metrics from the proxy
     local_metrics = get_proxy_metrics()
-    
+
     # Fetch metrics from the vLLM backend
     try:
         async with httpx.AsyncClient(timeout=httpx.Timeout(TIMEOUT)) as client:
@@ -423,7 +512,7 @@ async def metrics(request: Request):
     except Exception as e:
         log.error(f"Error fetching vLLM metrics: {e}")
         remote_metrics = f"# Error fetching vLLM metrics: {e}"
-        
+
     # Combine both and return
     combined_metrics = f"{local_metrics}\n\n# --- vLLM Backend Metrics ---\n\n{remote_metrics}"
     return PlainTextResponse(combined_metrics)
@@ -436,3 +525,21 @@ async def models(request: Request):
         if response.status_code != 200:
             raise HTTPException(status_code=response.status_code, detail=response.text)
         return JSONResponse(content=response.json())
+
+
+@router.get("/chutes/models", dependencies=[Depends(verify_authorization_header)])
+async def chutes_models(request: Request):
+    if not CHUTES_ENABLED:
+        return error(status_code=503, message="Chutes route is disabled", type="chutes_disabled")
+
+    if not CHUTES_API_KEY:
+        return error(status_code=503, message="CHUTES_API_KEY is not configured", type="chutes_misconfigured")
+
+    async with httpx.AsyncClient(
+        timeout=httpx.Timeout(TIMEOUT),
+        headers=_with_outbound_headers(_chutes_auth_headers()),
+    ) as client:
+        response = await client.get(CHUTES_MODELS_URL)
+        if response.status_code != 200:
+            raise HTTPException(status_code=response.status_code, detail=response.text)
+        return JSONResponse(content=response.json())

From 36f952049ecd9154f2c48328b4177e655e267f8a Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Wed, 1 Apr 2026 14:06:55 +0800
Subject: [PATCH 02/22] feat: support request model cache namespace for chutes
 mode

---
 src/app/api/v1/openai.py | 30 ++++++++++++++++-----
 src/app/cache/cache.py   | 58 +++++++++++++++++++++++++++++++++-------
 2 files changed, 72 insertions(+), 16 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index 19c2994..463ee13 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -99,6 +99,7 @@ async def stream_vllm_response(
     request_hash: Optional[str] = None,
     e2ee_ctx=None,
     outbound_headers: Optional[dict[str, str]] = None,
+    model_name: Optional[str] = None,
 ):
     """
     Handle streaming backend request.
@@ -151,7 +152,9 @@ async def generate_stream(response):
         # Cache the full request and response using the extracted cache key
         if chat_id:
             cache.set_chat(
-                chat_id, json.dumps(sign_chat(f"{request_sha256}:{response_sha256}"))
+                chat_id,
+                json.dumps(sign_chat(f"{request_sha256}:{response_sha256}")),
+                model_name=model_name,
             )
         else:
             error_message = "Chat id could not be extracted from the response"
@@ -195,6 +198,7 @@ async def non_stream_vllm_response(
     request_hash: Optional[str] = None,
     e2ee_ctx=None,
     outbound_headers: Optional[dict[str, str]] = None,
+    model_name: Optional[str] = None,
 ):
     """
     Handle non-streaming responses
@@ -230,7 +234,9 @@ async def non_stream_vllm_response(
         if chat_id:
             response_sha256 = sha256(json.dumps(response_data).encode("utf-8")).hexdigest()
             cache.set_chat(
-                chat_id, json.dumps(sign_chat(f"{request_sha256}:{response_sha256}"))
+                chat_id,
+                json.dumps(sign_chat(f"{request_sha256}:{response_sha256}")),
+                model_name=model_name,
             )
         else:
             raise Exception("Chat id could not be extracted from the response")
@@ -325,6 +331,7 @@ async def _chat_completions_impl(
 
     # Check if the request is for streaming or non-streaming
     is_stream = modified_json.get("stream", False)
+    request_model = modified_json.get("model")
     modified_request_body = json.dumps(modified_json).encode("utf-8")
 
     if is_stream:
@@ -335,6 +342,7 @@ async def _chat_completions_impl(
             x_request_hash,
             e2ee_ctx,
             outbound_headers=outbound_headers,
+            model_name=request_model,
         )
 
     response_data = await non_stream_vllm_response(
@@ -344,6 +352,7 @@ async def _chat_completions_impl(
         x_request_hash,
         e2ee_ctx,
         outbound_headers=outbound_headers,
+        model_name=request_model,
     )
     return JSONResponse(
         content=response_data,
@@ -445,24 +454,33 @@ async def completions(
     is_stream = modified_json.get(
         "stream", False
     )  # Default to non-streaming if not specified
+    request_model = modified_json.get("model")
     modified_request_body = json.dumps(modified_json).encode("utf-8")
     if is_stream:
         # Create a streaming response
         return await stream_vllm_response(
-            VLLM_COMPLETIONS_URL, request_body, modified_request_body, x_request_hash
+            VLLM_COMPLETIONS_URL,
+            request_body,
+            modified_request_body,
+            x_request_hash,
+            model_name=request_model,
         )
     else:
         # Handle non-streaming response
         response_data = await non_stream_vllm_response(
-            VLLM_COMPLETIONS_URL, request_body, modified_request_body, x_request_hash
+            VLLM_COMPLETIONS_URL,
+            request_body,
+            modified_request_body,
+            x_request_hash,
+            model_name=request_model,
         )
         return JSONResponse(content=response_data)
 
 
 # Get signature for chat_id of chat history
 @router.get("/signature/{chat_id}", dependencies=[Depends(verify_authorization_header)])
-async def signature(request: Request, chat_id: str, signing_algo: str = None):
-    cache_value = cache.get_chat(chat_id)
+async def signature(request: Request, chat_id: str, signing_algo: str = None, model: Optional[str] = None):
+    cache_value = cache.get_chat(chat_id, model_name=model)
     if cache_value is None:
         return not_found("Chat id not found or expired")
 
diff --git a/src/app/cache/cache.py b/src/app/cache/cache.py
index d629c95..5e0725d 100644
--- a/src/app/cache/cache.py
+++ b/src/app/cache/cache.py
@@ -1,4 +1,3 @@
-import json
 import os
 from typing import Optional
 
@@ -9,7 +8,11 @@
 
 CHAT_CACHE_EXPIRATION = int(os.getenv("CHAT_CACHE_EXPIRATION", "1200"))
 MODEL_NAME = os.getenv("MODEL_NAME")
-if not MODEL_NAME:
+CHUTES_ENABLED = os.getenv("CHUTES_ENABLED", "false").lower() in ("1", "true", "yes", "on")
+# Stable namespace used for backward-compatible lookup when request model is dynamic.
+CHUTES_FALLBACK_MODEL_NAME = os.getenv("CHUTES_FALLBACK_MODEL_NAME", "chutes")
+
+if not CHUTES_ENABLED and not MODEL_NAME:
     raise ValueError("MODEL_NAME is not set")
 
 CHAT_PREFIX = "chat"
@@ -35,9 +38,23 @@ def _init_redis(self) -> Optional[RedisCache]:
             return None
         return RedisCache(expiration=CHAT_CACHE_EXPIRATION)
 
-    def _make_key(self, prefix: str, key: str) -> str:
+    def _resolve_model_name(self, request_model: Optional[str] = None) -> str:
+        """Resolve model namespace for cache key."""
+        if request_model:
+            return request_model
+
+        if MODEL_NAME:
+            return MODEL_NAME
+
+        if CHUTES_ENABLED:
+            return CHUTES_FALLBACK_MODEL_NAME
+
+        raise ValueError("MODEL_NAME is not set")
+
+    def _make_key(self, prefix: str, key: str, model_name: Optional[str] = None) -> str:
         """Build namespaced cache key: model:prefix:key"""
-        return f"{MODEL_NAME}:{prefix}:{key}"
+        resolved_model_name = self._resolve_model_name(model_name)
+        return f"{resolved_model_name}:{prefix}:{key}"
 
     def _write_string(self, key: str, value: str) -> None:
         """Write string to local and optionally to Redis."""
@@ -63,16 +80,37 @@ def _read_string(self, key: str) -> Optional[str]:
 
     # Chat operations
 
-    def set_chat(self, chat_id: str, chat: str) -> None:
+    def set_chat(self, chat_id: str, chat: str, model_name: Optional[str] = None) -> None:
         """Store chat completion data."""
-        key = self._make_key(CHAT_PREFIX, chat_id)
+        key = self._make_key(CHAT_PREFIX, chat_id, model_name=model_name)
         self._write_string(key, chat)
 
-    def get_chat(self, chat_id: str) -> Optional[str]:
+        # Backward-compatible fallback key for dynamic-model proxy mode.
+        if CHUTES_ENABLED and model_name:
+            fallback_key = self._make_key(
+                CHAT_PREFIX,
+                chat_id,
+                model_name=CHUTES_FALLBACK_MODEL_NAME,
+            )
+            if fallback_key != key:
+                self._write_string(fallback_key, chat)
+
+    def get_chat(self, chat_id: str, model_name: Optional[str] = None) -> Optional[str]:
         """Retrieve chat completion data."""
-        key = self._make_key(CHAT_PREFIX, chat_id)
-        return self._read_string(key)
-
+        key = self._make_key(CHAT_PREFIX, chat_id, model_name=model_name)
+        value = self._read_string(key)
+        if value:
+            return value
+
+        if CHUTES_ENABLED and model_name and model_name != CHUTES_FALLBACK_MODEL_NAME:
+            fallback_key = self._make_key(
+                CHAT_PREFIX,
+                chat_id,
+                model_name=CHUTES_FALLBACK_MODEL_NAME,
+            )
+            return self._read_string(fallback_key)
+
+        return None
 
 
 cache = ChatCache()

From 7ab07f96f335b5848d44f9a8418c0125213d1e18 Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Wed, 1 Apr 2026 15:52:18 +0800
Subject: [PATCH 03/22] feat: route /v1/chat/completions to chutes when enabled

---
 src/app/api/v1/openai.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index 463ee13..aca1e30 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -360,7 +360,9 @@ async def _chat_completions_impl(
     )
 
 
-# VLLM Chat completions (existing path, unchanged behavior)
+# Chat completions (compat route):
+# - CHUTES_ENABLED=false -> original vLLM backend behavior
+# - CHUTES_ENABLED=true  -> transparently route to Chutes backend
 @router.post("/chat/completions", dependencies=[Depends(verify_authorization_header)])
 async def chat_completions(
     request: Request,
@@ -372,6 +374,15 @@ async def chat_completions(
     x_e2ee_nonce: Optional[str] = Header(None, alias="X-E2EE-Nonce"),
     x_e2ee_timestamp: Optional[str] = Header(None, alias="X-E2EE-Timestamp"),
 ):
+    backend_url = VLLM_URL
+    outbound_headers = None
+
+    if CHUTES_ENABLED:
+        if not CHUTES_API_KEY:
+            return error(status_code=503, message="CHUTES_API_KEY is not configured", type="chutes_misconfigured")
+        backend_url = CHUTES_CHAT_COMPLETIONS_URL
+        outbound_headers = _chutes_auth_headers()
+
     return await _chat_completions_impl(
         request=request,
         x_request_hash=x_request_hash,
@@ -381,7 +392,8 @@ async def chat_completions(
         x_e2ee_version=x_e2ee_version,
         x_e2ee_nonce=x_e2ee_nonce,
         x_e2ee_timestamp=x_e2ee_timestamp,
-        backend_url=VLLM_URL,
+        backend_url=backend_url,
+        outbound_headers=outbound_headers,
     )
 
 

From 45d8ff8bb11f6ba835a6fec46f3be1fd6d1c0aaf Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Wed, 1 Apr 2026 16:48:59 +0800
Subject: [PATCH 04/22] feat(attestation): add /v1/attestation/chain endpoint
 with binding proof

---
 src/app/api/v1/openai.py      | 111 +++++++++++++++++++++++++++++++---
 tests/app/test_openai_e2ee.py |  40 ++++++++++++
 2 files changed, 142 insertions(+), 9 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index aca1e30..c9b976d 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -1,5 +1,7 @@
 import json
 import os
+import time
+import hashlib
 from hashlib import sha256
 from typing import Optional
 
@@ -268,6 +270,24 @@ def strip_empty_tool_calls(payload: dict) -> dict:
     return payload
 
 
+def _normalize_signing_algo(signing_algo: str | None) -> str:
+    algo = ECDSA if signing_algo is None else signing_algo.strip().lower()
+    if algo not in [ECDSA, ED25519]:
+        raise ValueError("invalid_signing_algo")
+    return algo
+
+
+def _build_proxy_attestation(signing_algo: str, nonce: str | None) -> dict:
+    context = ecdsa_context if signing_algo == ECDSA else ed25519_context
+    attestation = dict(generate_attestation(context, nonce))
+    attestation["signing_public_key"] = local_model_public_key_hex(signing_algo)
+
+    resp = dict(attestation)
+    resp["signing_public_key"] = attestation["signing_public_key"]
+    resp["all_attestations"] = [attestation]
+    return resp
+
+
 # Get attestation report of intel quote and nvidia payload
 @router.get("/attestation/report", dependencies=[Depends(verify_authorization_header)])
 async def attestation_report(
@@ -276,22 +296,95 @@ async def attestation_report(
     nonce: str | None = Query(None),
     signing_address: str | None = Query(None),
 ):
-    signing_algo = ECDSA if signing_algo is None else signing_algo
-    if signing_algo not in [ECDSA, ED25519]:
+    try:
+        algo = _normalize_signing_algo(signing_algo)
+    except ValueError:
         return invalid_signing_algo()
 
-    context = ecdsa_context if signing_algo == ECDSA else ed25519_context
+    try:
+        return _build_proxy_attestation(algo, nonce)
+    except ValueError as exc:
+        raise HTTPException(status_code=400, detail=str(exc))
+
+
+@router.get("/attestation/chain", dependencies=[Depends(verify_authorization_header)])
+async def attestation_chain(
+    request: Request,
+    model: str = Query(...),
+    nonce: str = Query(...),
+    signing_algo: str | None = None,
+):
+    if not CHUTES_ENABLED:
+        return error(status_code=503, message="Chutes route is disabled", type="chutes_disabled")
+
+    if not CHUTES_API_KEY:
+        return error(status_code=503, message="CHUTES_API_KEY is not configured", type="chutes_misconfigured")
+
+    try:
+        algo = _normalize_signing_algo(signing_algo)
+    except ValueError:
+        return invalid_signing_algo()
+
+    if len(nonce) < 16:
+        return error(
+            status_code=400,
+            message="nonce must be at least 16 characters",
+            type="invalid_nonce",
+        )
 
     try:
-        attestation = dict(generate_attestation(context, nonce))
+        proxy_attestation = _build_proxy_attestation(algo, nonce)
     except ValueError as exc:
         raise HTTPException(status_code=400, detail=str(exc))
 
-    attestation["signing_public_key"] = local_model_public_key_hex(signing_algo)
-    resp = dict(attestation)
-    resp["signing_public_key"] = attestation["signing_public_key"]
-    resp["all_attestations"] = [attestation]
-    return resp
+    upstream_params = {"model": model, "nonce": nonce, "signing_algo": algo}
+    async with httpx.AsyncClient(
+        timeout=httpx.Timeout(TIMEOUT),
+        headers=_with_outbound_headers(_chutes_auth_headers()),
+    ) as client:
+        upstream_response = await client.get(
+            f"{CHUTES_BASE_URL}/v1/attestation/report",
+            params=upstream_params,
+        )
+
+    if upstream_response.status_code != 200:
+        raise HTTPException(status_code=upstream_response.status_code, detail=upstream_response.text)
+
+    upstream_attestation = upstream_response.json()
+    upstream_raw = json.dumps(upstream_attestation, sort_keys=True, separators=(",", ":"))
+    upstream_attestation_sha256 = hashlib.sha256(upstream_raw.encode("utf-8")).hexdigest()
+
+    binding_payload = {
+        "nonce": nonce,
+        "timestamp": int(time.time()),
+        "provider": "chutes",
+        "upstream_base_url": CHUTES_BASE_URL,
+        "model": model,
+        "upstream_attestation_sha256": upstream_attestation_sha256,
+    }
+    binding_text = json.dumps(binding_payload, sort_keys=True, separators=(",", ":"))
+    context = ecdsa_context if algo == ECDSA else ed25519_context
+
+    return {
+        "version": "1",
+        "proxy": {
+            "attestation": proxy_attestation,
+            "signing_public_key": proxy_attestation.get("signing_public_key"),
+        },
+        "upstream": {
+            "provider": "chutes",
+            "base_url": CHUTES_BASE_URL,
+            "model": model,
+            "attestation": upstream_attestation,
+            "attestation_sha256": upstream_attestation_sha256,
+        },
+        "binding_proof": {
+            "payload": binding_payload,
+            "signature": sign_message(context, binding_text),
+            "signing_algo": algo,
+            "signing_address": context.signing_address,
+        },
+    }
 
 
 async def _chat_completions_impl(
diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py
index 2b85760..9842d83 100644
--- a/tests/app/test_openai_e2ee.py
+++ b/tests/app/test_openai_e2ee.py
@@ -332,3 +332,43 @@ def test_attestation_report_includes_signing_public_key():
     assert "signing_public_key" in data
     assert len(data["signing_public_key"]) == 64
     assert data["all_attestations"][0]["signing_public_key"] == data["signing_public_key"]
+
+
+@pytest.mark.asyncio
+@pytest.mark.respx
+async def test_attestation_chain_success(respx_mock):
+    upstream_url = "https://llm.chutes.ai/v1/attestation/report"
+    upstream_att = {"report": "upstream-ok", "nonce": "n" * 16}
+    respx_mock.get(upstream_url).mock(return_value=httpx.Response(200, json=upstream_att))
+
+    with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"):
+        response = client.get(
+            "/v1/attestation/chain",
+            params={"model": "moonshotai/Kimi-K2.5-TEE", "nonce": "a" * 16, "signing_algo": "ecdsa"},
+            headers={"Authorization": TEST_AUTH_HEADER},
+        )
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["version"] == "1"
+    assert data["proxy"]["attestation"]["request_nonce"] == "a" * 16
+    assert data["upstream"]["attestation"] == upstream_att
+
+    expected_hash = __import__("hashlib").sha256(
+        json.dumps(upstream_att, sort_keys=True, separators=(",", ":")).encode("utf-8")
+    ).hexdigest()
+    assert data["upstream"]["attestation_sha256"] == expected_hash
+    assert data["binding_proof"]["payload"]["upstream_attestation_sha256"] == expected_hash
+    assert data["binding_proof"]["payload"]["model"] == "moonshotai/Kimi-K2.5-TEE"
+
+
+def test_attestation_chain_nonce_too_short():
+    with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"):
+        response = client.get(
+            "/v1/attestation/chain",
+            params={"model": "moonshotai/Kimi-K2.5-TEE", "nonce": "short", "signing_algo": "ecdsa"},
+            headers={"Authorization": TEST_AUTH_HEADER},
+        )
+
+    assert response.status_code == 400
+    assert response.json()["error"]["type"] == "invalid_nonce"

From d85efa0856bd3ac2f2c71fa2b2b18bef0e48f7e0 Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Wed, 1 Apr 2026 16:50:23 +0800
Subject: [PATCH 05/22] fix(attestation-chain): validate input and handle
 upstream fetch/JSON failures

---
 src/app/api/v1/openai.py      | 28 +++++++++++++++++++---------
 tests/app/test_openai_e2ee.py | 15 ++++++++++++++-
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index c9b976d..77fa58e 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -325,12 +325,16 @@ async def attestation_chain(
     except ValueError:
         return invalid_signing_algo()
 
+    nonce = nonce.strip()
+    model = model.strip()
     if len(nonce) < 16:
         return error(
             status_code=400,
             message="nonce must be at least 16 characters",
             type="invalid_nonce",
         )
+    if not model:
+        return error(status_code=400, message="model must not be empty", type="invalid_model")
 
     try:
         proxy_attestation = _build_proxy_attestation(algo, nonce)
@@ -338,19 +342,25 @@ async def attestation_chain(
         raise HTTPException(status_code=400, detail=str(exc))
 
     upstream_params = {"model": model, "nonce": nonce, "signing_algo": algo}
-    async with httpx.AsyncClient(
-        timeout=httpx.Timeout(TIMEOUT),
-        headers=_with_outbound_headers(_chutes_auth_headers()),
-    ) as client:
-        upstream_response = await client.get(
-            f"{CHUTES_BASE_URL}/v1/attestation/report",
-            params=upstream_params,
-        )
+    try:
+        async with httpx.AsyncClient(
+            timeout=httpx.Timeout(TIMEOUT),
+            headers=_with_outbound_headers(_chutes_auth_headers()),
+        ) as client:
+            upstream_response = await client.get(
+                f"{CHUTES_BASE_URL}/v1/attestation/report",
+                params=upstream_params,
+            )
+    except httpx.RequestError as exc:
+        return error(status_code=502, message=f"Failed to fetch upstream attestation: {exc}", type="upstream_unreachable")
 
     if upstream_response.status_code != 200:
         raise HTTPException(status_code=upstream_response.status_code, detail=upstream_response.text)
 
-    upstream_attestation = upstream_response.json()
+    try:
+        upstream_attestation = upstream_response.json()
+    except ValueError:
+        return error(status_code=502, message="Upstream attestation response is not valid JSON", type="upstream_invalid_response")
     upstream_raw = json.dumps(upstream_attestation, sort_keys=True, separators=(",", ":"))
     upstream_attestation_sha256 = hashlib.sha256(upstream_raw.encode("utf-8")).hexdigest()
 
diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py
index 9842d83..099da75 100644
--- a/tests/app/test_openai_e2ee.py
+++ b/tests/app/test_openai_e2ee.py
@@ -1,3 +1,4 @@
+import hashlib
 import httpx
 import pytest
 from fastapi.testclient import TestClient
@@ -354,7 +355,7 @@ async def test_attestation_chain_success(respx_mock):
     assert data["proxy"]["attestation"]["request_nonce"] == "a" * 16
     assert data["upstream"]["attestation"] == upstream_att
 
-    expected_hash = __import__("hashlib").sha256(
+    expected_hash = hashlib.sha256(
         json.dumps(upstream_att, sort_keys=True, separators=(",", ":")).encode("utf-8")
     ).hexdigest()
     assert data["upstream"]["attestation_sha256"] == expected_hash
@@ -372,3 +373,15 @@ def test_attestation_chain_nonce_too_short():
 
     assert response.status_code == 400
     assert response.json()["error"]["type"] == "invalid_nonce"
+
+
+def test_attestation_chain_model_empty():
+    with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"):
+        response = client.get(
+            "/v1/attestation/chain",
+            params={"model": "   ", "nonce": "a" * 16, "signing_algo": "ecdsa"},
+            headers={"Authorization": TEST_AUTH_HEADER},
+        )
+
+    assert response.status_code == 400
+    assert response.json()["error"]["type"] == "invalid_model"

From fabf438b3b2b5b49dcd02663a5db7f99c1606001 Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Wed, 1 Apr 2026 17:07:18 +0800
Subject: [PATCH 06/22] test(attestation): add standalone verification scripts
 for report and chain endpoints

---
 tests/app/verify_attestation_chain.py         | 74 +++++++++++++++++++
 .../app/verify_attestation_chain_negative.py  | 37 ++++++++++
 tests/app/verify_attestation_report.py        | 47 ++++++++++++
 3 files changed, 158 insertions(+)
 create mode 100644 tests/app/verify_attestation_chain.py
 create mode 100644 tests/app/verify_attestation_chain_negative.py
 create mode 100644 tests/app/verify_attestation_report.py

diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py
new file mode 100644
index 0000000..22995ad
--- /dev/null
+++ b/tests/app/verify_attestation_chain.py
@@ -0,0 +1,74 @@
+import os
+import json
+import time
+import hashlib
+import requests
+
+BASE_URL = os.environ.get("BASE_URL", "").rstrip("/")
+API_KEY = os.environ.get("API_KEY", "")
+MODEL_NAME = os.environ.get("MODEL_NAME", "")
+SIGNING_ALGO = os.environ.get("SIGNING_ALGO", "ecdsa").lower()
+
+
+def _canonical_json(obj) -> str:
+    return json.dumps(obj, sort_keys=True, separators=(",", ":"))
+
+
+def main():
+    if not BASE_URL or not API_KEY or not MODEL_NAME:
+        raise RuntimeError("Please set BASE_URL, API_KEY, MODEL_NAME")
+
+    nonce = f"chain-{int(time.time())}-proof"
+
+    url = f"{BASE_URL}/v1/attestation/chain"
+    resp = requests.get(
+        url,
+        params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO},
+        headers={"Authorization": f"Bearer {API_KEY}"},
+        timeout=90,
+    )
+
+    print("status:", resp.status_code)
+    resp.raise_for_status()
+
+    data = resp.json()
+
+    # Top-level structure checks
+    assert data.get("version") == "1", "unexpected chain version"
+    assert "proxy" in data and "upstream" in data and "binding_proof" in data, "missing chain sections"
+
+    # Proxy section
+    proxy = data["proxy"]
+    proxy_att = proxy["attestation"]
+    assert proxy.get("signing_public_key"), "missing proxy.signing_public_key"
+    assert proxy_att.get("signing_public_key") == proxy["signing_public_key"], "proxy signing_public_key mismatch"
+
+    # Upstream hash consistency
+    upstream = data["upstream"]
+    upstream_att = upstream["attestation"]
+    upstream_hash = hashlib.sha256(_canonical_json(upstream_att).encode("utf-8")).hexdigest()
+    assert upstream.get("attestation_sha256") == upstream_hash, "upstream attestation hash mismatch"
+
+    # Binding payload consistency
+    bp = data["binding_proof"]
+    payload = bp["payload"]
+    assert payload.get("nonce") == nonce, "binding payload nonce mismatch"
+    assert payload.get("model") == MODEL_NAME, "binding payload model mismatch"
+    assert payload.get("upstream_attestation_sha256") == upstream_hash, "binding payload hash mismatch"
+    assert bp.get("signing_algo") == SIGNING_ALGO, "binding signing_algo mismatch"
+    assert bp.get("signature"), "binding signature missing"
+
+    # Optional sanity: if upstream carries nonce, it should match
+    upstream_nonce = upstream_att.get("request_nonce") or upstream_att.get("nonce")
+    if upstream_nonce is not None:
+        assert upstream_nonce == nonce, "upstream nonce does not match request nonce"
+
+    print("[OK] /v1/attestation/chain validated")
+    print("nonce:", nonce)
+    print("model:", MODEL_NAME)
+    print("signing_algo:", SIGNING_ALGO)
+    print("upstream_attestation_sha256:", upstream_hash)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/app/verify_attestation_chain_negative.py b/tests/app/verify_attestation_chain_negative.py
new file mode 100644
index 0000000..62adc6d
--- /dev/null
+++ b/tests/app/verify_attestation_chain_negative.py
@@ -0,0 +1,37 @@
+import os
+import requests
+
+BASE_URL = os.environ.get("BASE_URL", "").rstrip("/")
+API_KEY = os.environ.get("API_KEY", "")
+MODEL_NAME = os.environ.get("MODEL_NAME", "")
+SIGNING_ALGO = os.environ.get("SIGNING_ALGO", "ecdsa").lower()
+
+
+def main():
+    if not BASE_URL or not API_KEY or not MODEL_NAME:
+        raise RuntimeError("Please set BASE_URL, API_KEY, MODEL_NAME")
+
+    # nonce deliberately too short, should fail with invalid_nonce
+    resp = requests.get(
+        f"{BASE_URL}/v1/attestation/chain",
+        params={"model": MODEL_NAME, "nonce": "short", "signing_algo": SIGNING_ALGO},
+        headers={"Authorization": f"Bearer {API_KEY}"},
+        timeout=60,
+    )
+
+    print("status:", resp.status_code)
+    print("body:", resp.text)
+
+    if resp.status_code != 400:
+        raise RuntimeError(f"Expected 400 for short nonce, got {resp.status_code}")
+
+    data = resp.json()
+    err_type = (data.get("error") or {}).get("type")
+    if err_type != "invalid_nonce":
+        raise RuntimeError(f"Expected error.type=invalid_nonce, got {err_type}")
+
+    print("[OK] negative check passed (invalid_nonce)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/app/verify_attestation_report.py b/tests/app/verify_attestation_report.py
new file mode 100644
index 0000000..6c7a3b3
--- /dev/null
+++ b/tests/app/verify_attestation_report.py
@@ -0,0 +1,47 @@
+import os
+import requests
+
+BASE_URL = os.environ.get("BASE_URL", "").rstrip("/")
+API_KEY = os.environ.get("API_KEY", "")
+MODEL_NAME = os.environ.get("MODEL_NAME", "")  # kept for consistent runner env
+SIGNING_ALGO = os.environ.get("SIGNING_ALGO", "ecdsa").lower()
+
+
+def main():
+    if not BASE_URL or not API_KEY:
+        raise RuntimeError("Please set BASE_URL and API_KEY")
+
+    url = f"{BASE_URL}/v1/attestation/report"
+    resp = requests.get(
+        url,
+        params={"signing_algo": SIGNING_ALGO},
+        headers={"Authorization": f"Bearer {API_KEY}"},
+        timeout=60,
+    )
+
+    print("status:", resp.status_code)
+    resp.raise_for_status()
+    data = resp.json()
+
+    assert "signing_public_key" in data, "missing signing_public_key"
+    assert "all_attestations" in data and isinstance(data["all_attestations"], list), "missing all_attestations"
+    assert data["all_attestations"], "all_attestations is empty"
+    assert (
+        data["all_attestations"][0].get("signing_public_key") == data["signing_public_key"]
+    ), "top-level signing_public_key mismatch"
+
+    expected_len = 128 if SIGNING_ALGO == "ecdsa" else 64
+    assert len(data["signing_public_key"]) == expected_len, (
+        f"unexpected signing_public_key length: {len(data['signing_public_key'])}, "
+        f"expected {expected_len} for {SIGNING_ALGO}"
+    )
+
+    print("[OK] /v1/attestation/report validated")
+    print("signing_algo:", SIGNING_ALGO)
+    print("signing_public_key len:", len(data["signing_public_key"]))
+    if MODEL_NAME:
+        print("model_name (unused in this check):", MODEL_NAME)
+
+
+if __name__ == "__main__":
+    main()

From ddd5408ccfda3bd731dd2ae767442f950fb35322 Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Wed, 1 Apr 2026 17:11:56 +0800
Subject: [PATCH 07/22] fix(attestation-chain): use api.chutes.ai for upstream
 attestation endpoint

---
 src/app/api/v1/openai.py      | 7 ++++---
 tests/app/test_openai_e2ee.py | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index 77fa58e..931a9b2 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -53,6 +53,7 @@
 
 CHUTES_ENABLED = os.getenv("CHUTES_ENABLED", "false").lower() in ("1", "true", "yes", "on")
 CHUTES_BASE_URL = os.getenv("CHUTES_BASE_URL", "https://llm.chutes.ai").rstrip("/")
+CHUTES_ATTESTATION_BASE_URL = os.getenv("CHUTES_ATTESTATION_BASE_URL", "https://api.chutes.ai").rstrip("/")
 CHUTES_CHAT_COMPLETIONS_URL = f"{CHUTES_BASE_URL}/v1/chat/completions"
 CHUTES_MODELS_URL = f"{CHUTES_BASE_URL}/v1/models"
 CHUTES_API_KEY = os.getenv("CHUTES_API_KEY")
@@ -348,7 +349,7 @@ async def attestation_chain(
             headers=_with_outbound_headers(_chutes_auth_headers()),
         ) as client:
             upstream_response = await client.get(
-                f"{CHUTES_BASE_URL}/v1/attestation/report",
+                f"{CHUTES_ATTESTATION_BASE_URL}/v1/attestation/report",
                 params=upstream_params,
             )
     except httpx.RequestError as exc:
@@ -368,7 +369,7 @@ async def attestation_chain(
         "nonce": nonce,
         "timestamp": int(time.time()),
         "provider": "chutes",
-        "upstream_base_url": CHUTES_BASE_URL,
+        "upstream_base_url": CHUTES_ATTESTATION_BASE_URL,
         "model": model,
         "upstream_attestation_sha256": upstream_attestation_sha256,
     }
@@ -383,7 +384,7 @@ async def attestation_chain(
         },
         "upstream": {
             "provider": "chutes",
-            "base_url": CHUTES_BASE_URL,
+            "base_url": CHUTES_ATTESTATION_BASE_URL,
             "model": model,
             "attestation": upstream_attestation,
             "attestation_sha256": upstream_attestation_sha256,
diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py
index 099da75..3b816bc 100644
--- a/tests/app/test_openai_e2ee.py
+++ b/tests/app/test_openai_e2ee.py
@@ -338,7 +338,7 @@ def test_attestation_report_includes_signing_public_key():
 @pytest.mark.asyncio
 @pytest.mark.respx
 async def test_attestation_chain_success(respx_mock):
-    upstream_url = "https://llm.chutes.ai/v1/attestation/report"
+    upstream_url = "https://api.chutes.ai/v1/attestation/report"
     upstream_att = {"report": "upstream-ok", "nonce": "n" * 16}
     respx_mock.get(upstream_url).mock(return_value=httpx.Response(200, json=upstream_att))
 

From 5bdab6f653d06f90d458f303692bf98e6494778b Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Wed, 1 Apr 2026 17:30:33 +0800
Subject: [PATCH 08/22] fix(test): use hex nonce in attestation chain
 verification script

---
 tests/app/verify_attestation_chain.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py
index 22995ad..4b9a4d8 100644
--- a/tests/app/verify_attestation_chain.py
+++ b/tests/app/verify_attestation_chain.py
@@ -1,7 +1,7 @@
 import os
 import json
-import time
 import hashlib
+import secrets
 import requests
 
 BASE_URL = os.environ.get("BASE_URL", "").rstrip("/")
@@ -18,7 +18,8 @@ def main():
     if not BASE_URL or not API_KEY or not MODEL_NAME:
         raise RuntimeError("Please set BASE_URL, API_KEY, MODEL_NAME")
 
-    nonce = f"chain-{int(time.time())}-proof"
+    # Use 32-byte hex nonce to match attestation implementations that expect hex challenge.
+    nonce = secrets.token_hex(32)
 
     url = f"{BASE_URL}/v1/attestation/chain"
     resp = requests.get(

From 7b5b4cba685d5d3f1abbc69ce8a87f2d14322e0d Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Wed, 1 Apr 2026 17:34:24 +0800
Subject: [PATCH 09/22] fix(attestation-chain): surface upstream rate-limit
 errors and improve verifier output

---
 src/app/api/v1/openai.py              | 7 +++++++
 tests/app/verify_attestation_chain.py | 8 ++++++++
 2 files changed, 15 insertions(+)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index 931a9b2..8ebf151 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -355,6 +355,13 @@ async def attestation_chain(
     except httpx.RequestError as exc:
         return error(status_code=502, message=f"Failed to fetch upstream attestation: {exc}", type="upstream_unreachable")
 
+    if upstream_response.status_code == 429:
+        retry_after = upstream_response.headers.get("Retry-After")
+        msg = "Upstream attestation is rate limited"
+        if retry_after:
+            msg = f"{msg}; retry after {retry_after} seconds"
+        return error(status_code=429, message=msg, type="upstream_rate_limited")
+
     if upstream_response.status_code != 200:
         raise HTTPException(status_code=upstream_response.status_code, detail=upstream_response.text)
 
diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py
index 4b9a4d8..69fd55c 100644
--- a/tests/app/verify_attestation_chain.py
+++ b/tests/app/verify_attestation_chain.py
@@ -30,6 +30,14 @@ def main():
     )
 
     print("status:", resp.status_code)
+    if resp.status_code == 429:
+        print("body:", resp.text)
+        raise RuntimeError(
+            "429 from /v1/attestation/chain (likely upstream Chutes attestation rate limit). "
+            "Wait and retry, or reduce verification call frequency."
+        )
+    if resp.status_code >= 400:
+        print("body:", resp.text)
     resp.raise_for_status()
 
     data = resp.json()

From d7dbef82b7a8e02f1fcff956a0d0f75b7168347f Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Wed, 1 Apr 2026 17:42:32 +0800
Subject: [PATCH 10/22] fix(attestation-chain): follow chutes evidence flow
 (model->chute->instances->evidence)

---
 src/app/api/v1/openai.py      | 117 ++++++++++++++++++++++++++++------
 tests/app/test_openai_e2ee.py |  42 +++++++++---
 2 files changed, 132 insertions(+), 27 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index 8ebf151..bae25c1 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -57,6 +57,8 @@
 CHUTES_CHAT_COMPLETIONS_URL = f"{CHUTES_BASE_URL}/v1/chat/completions"
 CHUTES_MODELS_URL = f"{CHUTES_BASE_URL}/v1/models"
 CHUTES_API_KEY = os.getenv("CHUTES_API_KEY")
+CHUTES_CHUTE_ID_CACHE: dict[str, tuple[str, float]] = {}
+CHUTES_CHUTE_ID_CACHE_TTL_SECONDS = int(os.getenv("CHUTES_CHUTE_ID_CACHE_TTL_SECONDS", "3600"))
 
 TIMEOUT = 60 * 10
 
@@ -289,6 +291,98 @@ def _build_proxy_attestation(signing_algo: str, nonce: str | None) -> dict:
     return resp
 
 
+def _error_from_upstream_429(response: httpx.Response):
+    retry_after = response.headers.get("Retry-After")
+    msg = "Upstream attestation is rate limited"
+    if retry_after:
+        msg = f"{msg}; retry after {retry_after} seconds"
+    return error(status_code=429, message=msg, type="upstream_rate_limited")
+
+
+async def _resolve_chute_id(client: httpx.AsyncClient, model: str) -> str | dict:
+    now = time.time()
+    cached = CHUTES_CHUTE_ID_CACHE.get(model)
+    if cached and now - cached[1] < CHUTES_CHUTE_ID_CACHE_TTL_SECONDS:
+        return cached[0]
+
+    resp = await client.get(
+        f"{CHUTES_ATTESTATION_BASE_URL}/chutes/",
+        params={"include_public": "true", "name": model},
+    )
+    if resp.status_code == 429:
+        return _error_from_upstream_429(resp)
+    if resp.status_code != 200:
+        raise HTTPException(status_code=resp.status_code, detail=resp.text)
+
+    data = resp.json()
+    items = data.get("items") or []
+    if not items:
+        return error(status_code=404, message=f"No chute found for model: {model}", type="upstream_model_not_found")
+
+    chute_id = items[0].get("chute_id")
+    if not chute_id:
+        return error(status_code=502, message="Upstream chute lookup missing chute_id", type="upstream_invalid_response")
+
+    CHUTES_CHUTE_ID_CACHE[model] = (chute_id, now)
+    return chute_id
+
+
+async def _fetch_chutes_attestation(client: httpx.AsyncClient, model: str, nonce: str) -> tuple[dict | None, dict | None]:
+    chute_id_or_error = await _resolve_chute_id(client, model)
+    if isinstance(chute_id_or_error, dict) and chute_id_or_error.get("error"):
+        return None, chute_id_or_error
+    chute_id = chute_id_or_error
+
+    e2e_resp = await client.get(f"{CHUTES_ATTESTATION_BASE_URL}/e2e/instances/{chute_id}")
+    if e2e_resp.status_code == 429:
+        return None, _error_from_upstream_429(e2e_resp)
+    if e2e_resp.status_code != 200:
+        raise HTTPException(status_code=e2e_resp.status_code, detail=e2e_resp.text)
+
+    e2e_data = e2e_resp.json()
+    instances = e2e_data.get("instances") or []
+    pubkeys = {i.get("instance_id"): i.get("e2e_pubkey") for i in instances if i.get("instance_id")}
+
+    evidence_resp = await client.get(
+        f"{CHUTES_ATTESTATION_BASE_URL}/chutes/{chute_id}/evidence",
+        params={"nonce": nonce},
+    )
+    if evidence_resp.status_code == 429:
+        return None, _error_from_upstream_429(evidence_resp)
+    if evidence_resp.status_code != 200:
+        raise HTTPException(status_code=evidence_resp.status_code, detail=evidence_resp.text)
+
+    evidence_data = evidence_resp.json()
+    evidence_list = evidence_data.get("evidence") or []
+
+    all_attestations = []
+    for e in evidence_list:
+        iid = e.get("instance_id")
+        e2e_pubkey = pubkeys.get(iid)
+        if not iid or not e2e_pubkey:
+            continue
+        all_attestations.append(
+            {
+                "instance_id": iid,
+                "nonce": nonce,
+                "e2e_pubkey": e2e_pubkey,
+                "intel_quote": e.get("quote"),
+                "gpu_evidence": e.get("gpu_evidence", []),
+                "certificate": e.get("certificate"),
+            }
+        )
+
+    if not all_attestations:
+        return None, error(status_code=502, message="No usable upstream attestations returned", type="upstream_invalid_response")
+
+    return {
+        "attestation_type": "chutes",
+        "nonce": nonce,
+        "chute_id": chute_id,
+        "all_attestations": all_attestations,
+    }, None
+
+
 # Get attestation report of intel quote and nvidia payload
 @router.get("/attestation/report", dependencies=[Depends(verify_authorization_header)])
 async def attestation_report(
@@ -342,33 +436,18 @@ async def attestation_chain(
     except ValueError as exc:
         raise HTTPException(status_code=400, detail=str(exc))
 
-    upstream_params = {"model": model, "nonce": nonce, "signing_algo": algo}
     try:
         async with httpx.AsyncClient(
             timeout=httpx.Timeout(TIMEOUT),
-            headers=_with_outbound_headers(_chutes_auth_headers()),
+            headers={"Authorization": f"Bearer {CHUTES_API_KEY}"},
         ) as client:
-            upstream_response = await client.get(
-                f"{CHUTES_ATTESTATION_BASE_URL}/v1/attestation/report",
-                params=upstream_params,
-            )
+            upstream_attestation, upstream_error = await _fetch_chutes_attestation(client, model, nonce)
     except httpx.RequestError as exc:
         return error(status_code=502, message=f"Failed to fetch upstream attestation: {exc}", type="upstream_unreachable")
 
-    if upstream_response.status_code == 429:
-        retry_after = upstream_response.headers.get("Retry-After")
-        msg = "Upstream attestation is rate limited"
-        if retry_after:
-            msg = f"{msg}; retry after {retry_after} seconds"
-        return error(status_code=429, message=msg, type="upstream_rate_limited")
-
-    if upstream_response.status_code != 200:
-        raise HTTPException(status_code=upstream_response.status_code, detail=upstream_response.text)
+    if upstream_error is not None:
+        return upstream_error
 
-    try:
-        upstream_attestation = upstream_response.json()
-    except ValueError:
-        return error(status_code=502, message="Upstream attestation response is not valid JSON", type="upstream_invalid_response")
     upstream_raw = json.dumps(upstream_attestation, sort_keys=True, separators=(",", ":"))
     upstream_attestation_sha256 = hashlib.sha256(upstream_raw.encode("utf-8")).hexdigest()
 
diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py
index 3b816bc..46ae695 100644
--- a/tests/app/test_openai_e2ee.py
+++ b/tests/app/test_openai_e2ee.py
@@ -338,29 +338,55 @@ def test_attestation_report_includes_signing_public_key():
 @pytest.mark.asyncio
 @pytest.mark.respx
 async def test_attestation_chain_success(respx_mock):
-    upstream_url = "https://api.chutes.ai/v1/attestation/report"
-    upstream_att = {"report": "upstream-ok", "nonce": "n" * 16}
-    respx_mock.get(upstream_url).mock(return_value=httpx.Response(200, json=upstream_att))
+    model = "moonshotai/Kimi-K2.5-TEE"
+    nonce = "a" * 16
+
+    respx_mock.get("https://api.chutes.ai/chutes/").mock(
+        return_value=httpx.Response(200, json={"items": [{"chute_id": "chute-123"}]})
+    )
+    respx_mock.get("https://api.chutes.ai/e2e/instances/chute-123").mock(
+        return_value=httpx.Response(
+            200,
+            json={"instances": [{"instance_id": "inst-1", "e2e_pubkey": "pk-1"}]},
+        )
+    )
+    respx_mock.get("https://api.chutes.ai/chutes/chute-123/evidence").mock(
+        return_value=httpx.Response(
+            200,
+            json={
+                "evidence": [
+                    {
+                        "instance_id": "inst-1",
+                        "quote": "intel-quote",
+                        "gpu_evidence": [{"gpu": "ok"}],
+                        "certificate": "cert",
+                    }
+                ]
+            },
+        )
+    )
 
     with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"):
         response = client.get(
             "/v1/attestation/chain",
-            params={"model": "moonshotai/Kimi-K2.5-TEE", "nonce": "a" * 16, "signing_algo": "ecdsa"},
+            params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"},
             headers={"Authorization": TEST_AUTH_HEADER},
         )
 
     assert response.status_code == 200
     data = response.json()
     assert data["version"] == "1"
-    assert data["proxy"]["attestation"]["request_nonce"] == "a" * 16
-    assert data["upstream"]["attestation"] == upstream_att
+    assert data["proxy"]["attestation"]["request_nonce"] == nonce
+    assert data["upstream"]["attestation"]["attestation_type"] == "chutes"
+    assert data["upstream"]["attestation"]["chute_id"] == "chute-123"
+    assert data["upstream"]["attestation"]["all_attestations"][0]["instance_id"] == "inst-1"
 
     expected_hash = hashlib.sha256(
-        json.dumps(upstream_att, sort_keys=True, separators=(",", ":")).encode("utf-8")
+        json.dumps(data["upstream"]["attestation"], sort_keys=True, separators=(",", ":")).encode("utf-8")
     ).hexdigest()
     assert data["upstream"]["attestation_sha256"] == expected_hash
     assert data["binding_proof"]["payload"]["upstream_attestation_sha256"] == expected_hash
-    assert data["binding_proof"]["payload"]["model"] == "moonshotai/Kimi-K2.5-TEE"
+    assert data["binding_proof"]["payload"]["model"] == model
 
 
 def test_attestation_chain_nonce_too_short():

From f46dc076b2a0ef4144cad54aa82dd7ffc94c6917 Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Wed, 1 Apr 2026 17:50:15 +0800
Subject: [PATCH 11/22] fix(test): add retry and configurable timeout for
 attestation chain verification

---
 tests/app/verify_attestation_chain.py | 58 +++++++++++++++++++--------
 1 file changed, 41 insertions(+), 17 deletions(-)

diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py
index 69fd55c..c0e904d 100644
--- a/tests/app/verify_attestation_chain.py
+++ b/tests/app/verify_attestation_chain.py
@@ -3,11 +3,15 @@
 import hashlib
 import secrets
 import requests
+from requests.exceptions import ReadTimeout
 
 BASE_URL = os.environ.get("BASE_URL", "").rstrip("/")
 API_KEY = os.environ.get("API_KEY", "")
 MODEL_NAME = os.environ.get("MODEL_NAME", "")
 SIGNING_ALGO = os.environ.get("SIGNING_ALGO", "ecdsa").lower()
+CONNECT_TIMEOUT = int(os.environ.get("CONNECT_TIMEOUT", "15"))
+READ_TIMEOUT = int(os.environ.get("READ_TIMEOUT", "300"))
+MAX_RETRIES = int(os.environ.get("MAX_RETRIES", "3"))
 
 
 def _canonical_json(obj) -> str:
@@ -22,23 +26,43 @@ def main():
     nonce = secrets.token_hex(32)
 
     url = f"{BASE_URL}/v1/attestation/chain"
-    resp = requests.get(
-        url,
-        params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO},
-        headers={"Authorization": f"Bearer {API_KEY}"},
-        timeout=90,
-    )
-
-    print("status:", resp.status_code)
-    if resp.status_code == 429:
-        print("body:", resp.text)
-        raise RuntimeError(
-            "429 from /v1/attestation/chain (likely upstream Chutes attestation rate limit). "
-            "Wait and retry, or reduce verification call frequency."
-        )
-    if resp.status_code >= 400:
-        print("body:", resp.text)
-    resp.raise_for_status()
+
+    resp = None
+    for attempt in range(1, MAX_RETRIES + 1):
+        try:
+            resp = requests.get(
+                url,
+                params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO},
+                headers={"Authorization": f"Bearer {API_KEY}"},
+                timeout=(CONNECT_TIMEOUT, READ_TIMEOUT),
+            )
+        except ReadTimeout:
+            if attempt == MAX_RETRIES:
+                raise RuntimeError(
+                    f"Read timeout after {MAX_RETRIES} attempts (connect={CONNECT_TIMEOUT}s, read={READ_TIMEOUT}s)."
+                )
+            print(f"attempt {attempt}/{MAX_RETRIES} timed out, retrying...")
+            continue
+
+        print("status:", resp.status_code)
+        if resp.status_code == 429:
+            retry_after = resp.headers.get("Retry-After")
+            print("body:", resp.text)
+            if attempt == MAX_RETRIES:
+                raise RuntimeError(
+                    "429 from /v1/attestation/chain (likely upstream Chutes attestation rate limit). "
+                    f"Retry-After={retry_after}."
+                )
+            print(f"attempt {attempt}/{MAX_RETRIES} got 429, retrying...")
+            continue
+
+        if resp.status_code >= 400:
+            print("body:", resp.text)
+        resp.raise_for_status()
+        break
+
+    if resp is None:
+        raise RuntimeError("No response received")
 
     data = resp.json()
 

From 84021eb2213cf91e329d05739e9b6b33f5046f16 Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Thu, 2 Apr 2026 09:55:20 +0800
Subject: [PATCH 12/22] test(attestation): verify binding_proof signature
 against proxy signing identity

---
 tests/app/verify_attestation_chain.py | 43 +++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py
index c0e904d..d90be17 100644
--- a/tests/app/verify_attestation_chain.py
+++ b/tests/app/verify_attestation_chain.py
@@ -4,6 +4,9 @@
 import secrets
 import requests
 from requests.exceptions import ReadTimeout
+from eth_account import Account
+from eth_account.messages import encode_defunct
+from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey
 
 BASE_URL = os.environ.get("BASE_URL", "").rstrip("/")
 API_KEY = os.environ.get("API_KEY", "")
@@ -18,6 +21,37 @@ def _canonical_json(obj) -> str:
     return json.dumps(obj, sort_keys=True, separators=(",", ":"))
 
 
+def _verify_binding_signature(binding_proof: dict):
+    payload_text = _canonical_json(binding_proof["payload"])
+    signing_algo = binding_proof.get("signing_algo")
+    signature = binding_proof.get("signature")
+    signing_address = binding_proof.get("signing_address")
+
+    if signing_algo == "ecdsa":
+        if not signature or not signature.startswith("0x"):
+            raise RuntimeError("invalid ecdsa signature format")
+        recovered = Account.recover_message(
+            encode_defunct(text=payload_text),
+            signature=signature,
+        )
+        if recovered.lower() != (signing_address or "").lower():
+            raise RuntimeError(
+                f"binding signature mismatch: recovered={recovered}, expected={signing_address}"
+            )
+        return recovered
+
+    if signing_algo == "ed25519":
+        # In this project ed25519 signing_address is the raw public key hex.
+        pubkey_hex = signing_address or ""
+        if len(pubkey_hex) != 64:
+            raise RuntimeError("invalid ed25519 signing_address/public key")
+        pubkey = Ed25519PublicKey.from_public_bytes(bytes.fromhex(pubkey_hex))
+        pubkey.verify(bytes.fromhex(signature), payload_text.encode("utf-8"))
+        return pubkey_hex
+
+    raise RuntimeError(f"unsupported signing_algo: {signing_algo}")
+
+
 def main():
     if not BASE_URL or not API_KEY or not MODEL_NAME:
         raise RuntimeError("Please set BASE_URL, API_KEY, MODEL_NAME")
@@ -91,6 +125,14 @@ def main():
     assert bp.get("signing_algo") == SIGNING_ALGO, "binding signing_algo mismatch"
     assert bp.get("signature"), "binding signature missing"
 
+    # Signature verification: proves the binding payload was signed by proxy signing identity.
+    recovered_signer = _verify_binding_signature(bp)
+    proxy_signing_address = proxy_att.get("signing_address")
+    if proxy_signing_address:
+        assert recovered_signer.lower() == proxy_signing_address.lower(), (
+            f"proxy signing address mismatch: recovered={recovered_signer}, proxy={proxy_signing_address}"
+        )
+
     # Optional sanity: if upstream carries nonce, it should match
     upstream_nonce = upstream_att.get("request_nonce") or upstream_att.get("nonce")
     if upstream_nonce is not None:
@@ -100,6 +142,7 @@ def main():
     print("nonce:", nonce)
     print("model:", MODEL_NAME)
     print("signing_algo:", SIGNING_ALGO)
+    print("binding_signer:", recovered_signer)
     print("upstream_attestation_sha256:", upstream_hash)
 
 

From 48d564d115c6931ad13d37e993a95ac76b62cea6 Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Thu, 2 Apr 2026 14:57:25 +0800
Subject: [PATCH 13/22] feat: add proxy-mode chutes verification with
 passthrough option

---
 src/app/api/v1/openai.py              | 182 ++++++++++++++++++++++++--
 tests/app/test_openai_e2ee.py         | 124 ++++++++++++++++--
 tests/app/verify_attestation_chain.py |  71 +++++-----
 3 files changed, 323 insertions(+), 54 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index bae25c1..814589e 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -1,9 +1,9 @@
+import base64
 import json
 import os
 import time
-import hashlib
 from hashlib import sha256
-from typing import Optional
+from typing import Any, Optional
 
 import httpx
 from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, Header, Query
@@ -368,6 +368,8 @@ async def _fetch_chutes_attestation(client: httpx.AsyncClient, model: str, nonce
                 "e2e_pubkey": e2e_pubkey,
                 "intel_quote": e.get("quote"),
                 "gpu_evidence": e.get("gpu_evidence", []),
+                "gpu_tokens": e.get("gpu_tokens"),
+                "tdx_verification": e.get("tdx_verification"),
                 "certificate": e.get("certificate"),
             }
         )
@@ -383,6 +385,109 @@ async def _fetch_chutes_attestation(client: httpx.AsyncClient, model: str, nonce
     }, None
 
 
+def _decode_quote(quote_b64: str) -> bytes:
+    return base64.b64decode(quote_b64)
+
+
+def _extract_td_attributes(quote_bytes: bytes) -> int:
+    body = quote_bytes[48 : 48 + 584]
+    td_attributes_hex = body[120:128].hex()
+    return int(td_attributes_hex, 16)
+
+
+def _extract_report_data_sha256(quote_bytes: bytes) -> str:
+    td_report_bytes = quote_bytes[48:632]
+    report_data_hex = td_report_bytes[520:584].hex().lower()
+    return report_data_hex[:64]
+
+
+def _decode_jwt_payload_without_verification(token: str) -> dict[str, Any]:
+    parts = token.split(".")
+    if len(parts) < 2:
+        raise ValueError("invalid_jwt_format")
+    payload = parts[1]
+    payload += "=" * (-len(payload) % 4)
+    decoded = base64.urlsafe_b64decode(payload.encode("utf-8")).decode("utf-8")
+    return json.loads(decoded)
+
+
+def _extract_gpu_tokens(attestation: dict[str, Any]) -> Any:
+    if "gpu_tokens" in attestation:
+        return attestation.get("gpu_tokens")
+    return attestation.get("gpu_evidence")
+
+
+def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -> list[str]:
+    errors: list[str] = []
+
+    quote_b64 = attestation.get("intel_quote") or attestation.get("quote")
+    e2e_pubkey = attestation.get("e2e_pubkey")
+    if not quote_b64:
+        return ["missing_intel_quote"]
+    if not e2e_pubkey:
+        return ["missing_e2e_pubkey"]
+
+    try:
+        quote_bytes = _decode_quote(quote_b64)
+    except Exception:
+        return ["invalid_quote_base64"]
+
+    tdx_result = (attestation.get("tdx_verification") or {}).get("result") or {}
+    tdx_status = tdx_result.get("status")
+    if tdx_status != "UpToDate":
+        errors.append(f"tdx_status_not_uptodate:{tdx_status or 'missing'}")
+
+    try:
+        td_attributes = _extract_td_attributes(quote_bytes)
+        if td_attributes & 1:
+            errors.append("tdx_debug_mode_enabled")
+    except Exception:
+        errors.append("tdx_attributes_parse_failed")
+
+    expected_report_data = sha256((nonce + e2e_pubkey).encode("utf-8")).hexdigest().lower()
+    actual_report_data = _extract_report_data_sha256(quote_bytes)
+    if actual_report_data != expected_report_data:
+        errors.append("report_data_binding_mismatch")
+
+    gpu_tokens = _extract_gpu_tokens(attestation)
+    if isinstance(gpu_tokens, dict):
+        if gpu_tokens.get("error"):
+            errors.append("gpu_tokens_error")
+        tokens = gpu_tokens.get("tokens")
+        if tokens:
+            try:
+                platform_entry = tokens[0]
+                if not isinstance(platform_entry, list) or len(platform_entry) < 2:
+                    errors.append("gpu_platform_token_format_invalid")
+                else:
+                    platform_claims = _decode_jwt_payload_without_verification(platform_entry[1])
+                    if platform_claims.get("x-nvidia-overall-att-result") is not True:
+                        errors.append("gpu_overall_attestation_failed")
+                    if platform_claims.get("eat_nonce") != expected_report_data:
+                        errors.append("gpu_eat_nonce_mismatch")
+            except Exception:
+                errors.append("gpu_tokens_parse_failed")
+
+    return errors
+
+
+def _verify_chutes_attestation_bundle(attestation_bundle: dict[str, Any], nonce: str) -> tuple[bool, list[dict[str, Any]]]:
+    details: list[dict[str, Any]] = []
+    attestations = attestation_bundle.get("all_attestations") or []
+    if not attestations:
+        return False, [{"instance_id": None, "errors": ["missing_all_attestations"]}]
+
+    all_ok = True
+    for att in attestations:
+        instance_id = att.get("instance_id")
+        att_errors = _verify_single_chutes_attestation(att, nonce)
+        if att_errors:
+            all_ok = False
+        details.append({"instance_id": instance_id, "errors": att_errors})
+
+    return all_ok, details
+
+
 # Get attestation report of intel quote and nvidia payload
 @router.get("/attestation/report", dependencies=[Depends(verify_authorization_header)])
 async def attestation_report(
@@ -408,6 +513,7 @@ async def attestation_chain(
     model: str = Query(...),
     nonce: str = Query(...),
     signing_algo: str | None = None,
+    verify_mode: str = Query("proxy"),
 ):
     if not CHUTES_ENABLED:
         return error(status_code=503, message="Chutes route is disabled", type="chutes_disabled")
@@ -422,6 +528,10 @@ async def attestation_chain(
 
     nonce = nonce.strip()
     model = model.strip()
+    mode = verify_mode.strip().lower()
+    if mode not in {"proxy", "passthrough"}:
+        return error(status_code=400, message="verify_mode must be one of: proxy, passthrough", type="invalid_verify_mode")
+
     if len(nonce) < 16:
         return error(
             status_code=400,
@@ -449,7 +559,9 @@ async def attestation_chain(
         return upstream_error
 
     upstream_raw = json.dumps(upstream_attestation, sort_keys=True, separators=(",", ":"))
-    upstream_attestation_sha256 = hashlib.sha256(upstream_raw.encode("utf-8")).hexdigest()
+    upstream_attestation_sha256 = sha256(upstream_raw.encode("utf-8")).hexdigest()
+
+    context = ecdsa_context if algo == ECDSA else ed25519_context
 
     binding_payload = {
         "nonce": nonce,
@@ -460,24 +572,66 @@ async def attestation_chain(
         "upstream_attestation_sha256": upstream_attestation_sha256,
     }
     binding_text = json.dumps(binding_payload, sort_keys=True, separators=(",", ":"))
-    context = ecdsa_context if algo == ECDSA else ed25519_context
+    binding_proof = {
+        "payload": binding_payload,
+        "signature": sign_message(context, binding_text),
+        "signing_algo": algo,
+        "signing_address": context.signing_address,
+    }
+
+    if mode == "passthrough":
+        return {
+            "version": "1",
+            "verify_mode": mode,
+            "proxy": {
+                "attestation": proxy_attestation,
+                "signing_public_key": proxy_attestation.get("signing_public_key"),
+            },
+            "upstream": {
+                "provider": "chutes",
+                "base_url": CHUTES_ATTESTATION_BASE_URL,
+                "model": model,
+                "attestation": upstream_attestation,
+                "attestation_sha256": upstream_attestation_sha256,
+            },
+            "binding_proof": binding_proof,
+        }
+
+    verified, verification_details = _verify_chutes_attestation_bundle(upstream_attestation, nonce)
+    if not verified:
+        return error(
+            status_code=502,
+            message=f"Chutes attestation verification failed in proxy mode: {json.dumps(verification_details, separators=(',', ':'))}",
+            type="chutes_verification_failed",
+        )
+
+    receipt_payload = {
+        "nonce": nonce,
+        "request_hash": sha256(f"{model}:{nonce}".encode("utf-8")).hexdigest(),
+        "provider": "chutes",
+        "model": model,
+        "verify_mode": mode,
+        "verification_policy": "chutes-v1",
+        "verification_policy_version": "1",
+        "upstream_attestation_sha256": upstream_attestation_sha256,
+        "binding_signature": binding_proof["signature"],
+        "binding_signing_algo": binding_proof["signing_algo"],
+        "binding_signing_address": binding_proof["signing_address"],
+        "verified_at": int(time.time()),
+        "result": "pass",
+    }
+    receipt_text = json.dumps(receipt_payload, sort_keys=True, separators=(",", ":"))
 
     return {
         "version": "1",
+        "verify_mode": mode,
         "proxy": {
             "attestation": proxy_attestation,
             "signing_public_key": proxy_attestation.get("signing_public_key"),
         },
-        "upstream": {
-            "provider": "chutes",
-            "base_url": CHUTES_ATTESTATION_BASE_URL,
-            "model": model,
-            "attestation": upstream_attestation,
-            "attestation_sha256": upstream_attestation_sha256,
-        },
-        "binding_proof": {
-            "payload": binding_payload,
-            "signature": sign_message(context, binding_text),
+        "verification_receipt": {
+            "payload": receipt_payload,
+            "signature": sign_message(context, receipt_text),
             "signing_algo": algo,
             "signing_address": context.signing_address,
         },
diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py
index 46ae695..1ee3607 100644
--- a/tests/app/test_openai_e2ee.py
+++ b/tests/app/test_openai_e2ee.py
@@ -1,4 +1,7 @@
+import base64
 import hashlib
+import json
+
 import httpx
 import pytest
 from fastapi.testclient import TestClient
@@ -335,11 +338,27 @@ def test_attestation_report_includes_signing_public_key():
     assert data["all_attestations"][0]["signing_public_key"] == data["signing_public_key"]
 
 
+def _make_chutes_quote_b64(nonce: str, e2e_pubkey: str, *, debug_enabled: bool = False) -> str:
+    quote_bytes = bytearray(700)
+
+    td_attributes_offset = 48 + 120
+    quote_bytes[td_attributes_offset : td_attributes_offset + 8] = (1 if debug_enabled else 0).to_bytes(8, "little")
+
+    report_data_offset = 48 + 520
+    report_data_hex = hashlib.sha256((nonce + e2e_pubkey).encode("utf-8")).hexdigest()
+    report_data_bytes = bytes.fromhex(report_data_hex) + bytes(32)
+    quote_bytes[report_data_offset : report_data_offset + 64] = report_data_bytes
+
+    return base64.b64encode(bytes(quote_bytes)).decode("utf-8")
+
+
 @pytest.mark.asyncio
 @pytest.mark.respx
-async def test_attestation_chain_success(respx_mock):
+async def test_attestation_chain_success_proxy_mode(respx_mock):
     model = "moonshotai/Kimi-K2.5-TEE"
     nonce = "a" * 16
+    e2e_pubkey = "pk-1"
+    quote_b64 = _make_chutes_quote_b64(nonce, e2e_pubkey)
 
     respx_mock.get("https://api.chutes.ai/chutes/").mock(
         return_value=httpx.Response(200, json={"items": [{"chute_id": "chute-123"}]})
@@ -347,7 +366,7 @@ async def test_attestation_chain_success(respx_mock):
     respx_mock.get("https://api.chutes.ai/e2e/instances/chute-123").mock(
         return_value=httpx.Response(
             200,
-            json={"instances": [{"instance_id": "inst-1", "e2e_pubkey": "pk-1"}]},
+            json={"instances": [{"instance_id": "inst-1", "e2e_pubkey": e2e_pubkey}]},
         )
     )
     respx_mock.get("https://api.chutes.ai/chutes/chute-123/evidence").mock(
@@ -357,8 +376,8 @@ async def test_attestation_chain_success(respx_mock):
                 "evidence": [
                     {
                         "instance_id": "inst-1",
-                        "quote": "intel-quote",
-                        "gpu_evidence": [{"gpu": "ok"}],
+                        "quote": quote_b64,
+                        "tdx_verification": {"result": {"status": "UpToDate"}},
                         "certificate": "cert",
                     }
                 ]
@@ -376,17 +395,94 @@ async def test_attestation_chain_success(respx_mock):
     assert response.status_code == 200
     data = response.json()
     assert data["version"] == "1"
+    assert data["verify_mode"] == "proxy"
     assert data["proxy"]["attestation"]["request_nonce"] == nonce
-    assert data["upstream"]["attestation"]["attestation_type"] == "chutes"
-    assert data["upstream"]["attestation"]["chute_id"] == "chute-123"
-    assert data["upstream"]["attestation"]["all_attestations"][0]["instance_id"] == "inst-1"
+    assert "verification_receipt" in data
+    assert data["verification_receipt"]["payload"]["result"] == "pass"
+    assert data["verification_receipt"]["payload"]["model"] == model
+
+
+@pytest.mark.asyncio
+@pytest.mark.respx
+async def test_attestation_chain_passthrough_mode_returns_upstream_bundle(respx_mock):
+    model = "moonshotai/Kimi-K2.5-TEE"
+    nonce = "b" * 16
+    e2e_pubkey = "pk-2"
+    quote_b64 = _make_chutes_quote_b64(nonce, e2e_pubkey)
+
+    respx_mock.get("https://api.chutes.ai/chutes/").mock(
+        return_value=httpx.Response(200, json={"items": [{"chute_id": "chute-321"}]})
+    )
+    respx_mock.get("https://api.chutes.ai/e2e/instances/chute-321").mock(
+        return_value=httpx.Response(
+            200,
+            json={"instances": [{"instance_id": "inst-2", "e2e_pubkey": e2e_pubkey}]},
+        )
+    )
+    respx_mock.get("https://api.chutes.ai/chutes/chute-321/evidence").mock(
+        return_value=httpx.Response(
+            200,
+            json={
+                "evidence": [
+                    {
+                        "instance_id": "inst-2",
+                        "quote": quote_b64,
+                        "tdx_verification": {"result": {"status": "UpToDate"}},
+                        "certificate": "cert",
+                    }
+                ]
+            },
+        )
+    )
+
+    with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"):
+        response = client.get(
+            "/v1/attestation/chain",
+            params={"model": model, "nonce": nonce, "signing_algo": "ecdsa", "verify_mode": "passthrough"},
+            headers={"Authorization": TEST_AUTH_HEADER},
+        )
 
+    assert response.status_code == 200
+    data = response.json()
+    assert data["verify_mode"] == "passthrough"
     expected_hash = hashlib.sha256(
         json.dumps(data["upstream"]["attestation"], sort_keys=True, separators=(",", ":")).encode("utf-8")
     ).hexdigest()
     assert data["upstream"]["attestation_sha256"] == expected_hash
     assert data["binding_proof"]["payload"]["upstream_attestation_sha256"] == expected_hash
-    assert data["binding_proof"]["payload"]["model"] == model
+
+
+def test_attestation_chain_proxy_mode_verification_failure_returns_502():
+    model = "moonshotai/Kimi-K2.5-TEE"
+    nonce = "a" * 16
+
+    with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"), patch(
+        "app.api.v1.openai._fetch_chutes_attestation",
+        return_value=(
+            {
+                "attestation_type": "chutes",
+                "nonce": nonce,
+                "chute_id": "chute-123",
+                "all_attestations": [
+                    {
+                        "instance_id": "inst-1",
+                        "e2e_pubkey": "pk-1",
+                        "intel_quote": "bad-quote",
+                        "tdx_verification": {"result": {"status": "OutOfDate"}},
+                    }
+                ],
+            },
+            None,
+        ),
+    ):
+        response = client.get(
+            "/v1/attestation/chain",
+            params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"},
+            headers={"Authorization": TEST_AUTH_HEADER},
+        )
+
+    assert response.status_code == 502
+    assert response.json()["error"]["type"] == "chutes_verification_failed"
 
 
 def test_attestation_chain_nonce_too_short():
@@ -401,6 +497,18 @@ def test_attestation_chain_nonce_too_short():
     assert response.json()["error"]["type"] == "invalid_nonce"
 
 
+def test_attestation_chain_invalid_verify_mode():
+    with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"):
+        response = client.get(
+            "/v1/attestation/chain",
+            params={"model": "moonshotai/Kimi-K2.5-TEE", "nonce": "a" * 16, "signing_algo": "ecdsa", "verify_mode": "bad"},
+            headers={"Authorization": TEST_AUTH_HEADER},
+        )
+
+    assert response.status_code == 400
+    assert response.json()["error"]["type"] == "invalid_verify_mode"
+
+
 def test_attestation_chain_model_empty():
     with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"):
         response = client.get(
diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py
index d90be17..ad03f5f 100644
--- a/tests/app/verify_attestation_chain.py
+++ b/tests/app/verify_attestation_chain.py
@@ -15,6 +15,7 @@
 CONNECT_TIMEOUT = int(os.environ.get("CONNECT_TIMEOUT", "15"))
 READ_TIMEOUT = int(os.environ.get("READ_TIMEOUT", "300"))
 MAX_RETRIES = int(os.environ.get("MAX_RETRIES", "3"))
+VERIFY_MODE = os.environ.get("VERIFY_MODE", "proxy").lower()
 
 
 def _canonical_json(obj) -> str:
@@ -66,7 +67,7 @@ def main():
         try:
             resp = requests.get(
                 url,
-                params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO},
+                params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO, "verify_mode": VERIFY_MODE},
                 headers={"Authorization": f"Bearer {API_KEY}"},
                 timeout=(CONNECT_TIMEOUT, READ_TIMEOUT),
             )
@@ -102,48 +103,54 @@ def main():
 
     # Top-level structure checks
     assert data.get("version") == "1", "unexpected chain version"
-    assert "proxy" in data and "upstream" in data and "binding_proof" in data, "missing chain sections"
+    assert data.get("verify_mode") == VERIFY_MODE, f"unexpected verify_mode: {data.get('verify_mode')}"
+    assert "proxy" in data, "missing proxy section"
 
-    # Proxy section
     proxy = data["proxy"]
     proxy_att = proxy["attestation"]
     assert proxy.get("signing_public_key"), "missing proxy.signing_public_key"
     assert proxy_att.get("signing_public_key") == proxy["signing_public_key"], "proxy signing_public_key mismatch"
 
-    # Upstream hash consistency
-    upstream = data["upstream"]
-    upstream_att = upstream["attestation"]
-    upstream_hash = hashlib.sha256(_canonical_json(upstream_att).encode("utf-8")).hexdigest()
-    assert upstream.get("attestation_sha256") == upstream_hash, "upstream attestation hash mismatch"
-
-    # Binding payload consistency
-    bp = data["binding_proof"]
-    payload = bp["payload"]
-    assert payload.get("nonce") == nonce, "binding payload nonce mismatch"
-    assert payload.get("model") == MODEL_NAME, "binding payload model mismatch"
-    assert payload.get("upstream_attestation_sha256") == upstream_hash, "binding payload hash mismatch"
-    assert bp.get("signing_algo") == SIGNING_ALGO, "binding signing_algo mismatch"
-    assert bp.get("signature"), "binding signature missing"
-
-    # Signature verification: proves the binding payload was signed by proxy signing identity.
-    recovered_signer = _verify_binding_signature(bp)
-    proxy_signing_address = proxy_att.get("signing_address")
-    if proxy_signing_address:
-        assert recovered_signer.lower() == proxy_signing_address.lower(), (
-            f"proxy signing address mismatch: recovered={recovered_signer}, proxy={proxy_signing_address}"
-        )
+    if VERIFY_MODE == "passthrough":
+        assert "upstream" in data and "binding_proof" in data, "missing passthrough sections"
+        upstream = data["upstream"]
+        upstream_att = upstream["attestation"]
+        upstream_hash = hashlib.sha256(_canonical_json(upstream_att).encode("utf-8")).hexdigest()
+        assert upstream.get("attestation_sha256") == upstream_hash, "upstream attestation hash mismatch"
+
+        bp = data["binding_proof"]
+        payload = bp["payload"]
+        assert payload.get("nonce") == nonce, "binding payload nonce mismatch"
+        assert payload.get("model") == MODEL_NAME, "binding payload model mismatch"
+        assert payload.get("upstream_attestation_sha256") == upstream_hash, "binding payload hash mismatch"
+        assert bp.get("signing_algo") == SIGNING_ALGO, "binding signing_algo mismatch"
+        assert bp.get("signature"), "binding signature missing"
+
+        recovered_signer = _verify_binding_signature(bp)
+        proxy_signing_address = proxy_att.get("signing_address")
+        if proxy_signing_address:
+            assert recovered_signer.lower() == proxy_signing_address.lower(), (
+                f"proxy signing address mismatch: recovered={recovered_signer}, proxy={proxy_signing_address}"
+            )
+
+        print("[OK] /v1/attestation/chain passthrough validated")
+        print("binding_signer:", recovered_signer)
+        print("upstream_attestation_sha256:", upstream_hash)
+    else:
+        assert "verification_receipt" in data, "missing verification_receipt"
+        receipt = data["verification_receipt"]
+        assert receipt.get("signature"), "receipt signature missing"
+        payload = receipt.get("payload") or {}
+        assert payload.get("result") == "pass", "receipt result is not pass"
+        assert payload.get("model") == MODEL_NAME, "receipt model mismatch"
+        assert payload.get("nonce") == nonce, "receipt nonce mismatch"
 
-    # Optional sanity: if upstream carries nonce, it should match
-    upstream_nonce = upstream_att.get("request_nonce") or upstream_att.get("nonce")
-    if upstream_nonce is not None:
-        assert upstream_nonce == nonce, "upstream nonce does not match request nonce"
+        print("[OK] /v1/attestation/chain proxy mode validated")
 
-    print("[OK] /v1/attestation/chain validated")
     print("nonce:", nonce)
     print("model:", MODEL_NAME)
     print("signing_algo:", SIGNING_ALGO)
-    print("binding_signer:", recovered_signer)
-    print("upstream_attestation_sha256:", upstream_hash)
+    print("verify_mode:", VERIFY_MODE)
 
 
 if __name__ == "__main__":

From f1abb378924668114a60bfe4f75796e625aa295f Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Thu, 2 Apr 2026 15:15:18 +0800
Subject: [PATCH 14/22] fix: tighten chutes proxy verification parity checks

---
 src/app/api/v1/openai.py      |  9 +++++++--
 tests/app/test_openai_e2ee.py | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index 814589e..e4be1f5 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -386,7 +386,7 @@ async def _fetch_chutes_attestation(client: httpx.AsyncClient, model: str, nonce
 
 
 def _decode_quote(quote_b64: str) -> bytes:
-    return base64.b64decode(quote_b64)
+    return base64.b64decode(quote_b64, validate=True)
 
 
 def _extract_td_attributes(quote_bytes: bytes) -> int:
@@ -432,7 +432,12 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -
     except Exception:
         return ["invalid_quote_base64"]
 
-    tdx_result = (attestation.get("tdx_verification") or {}).get("result") or {}
+    tdx_verification = attestation.get("tdx_verification") or {}
+    tdx_error = tdx_verification.get("error")
+    if tdx_error:
+        errors.append("tdx_online_verification_error")
+
+    tdx_result = tdx_verification.get("result") or {}
     tdx_status = tdx_result.get("status")
     if tdx_status != "UpToDate":
         errors.append(f"tdx_status_not_uptodate:{tdx_status or 'missing'}")
diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py
index 1ee3607..f520125 100644
--- a/tests/app/test_openai_e2ee.py
+++ b/tests/app/test_openai_e2ee.py
@@ -497,6 +497,39 @@ def test_attestation_chain_nonce_too_short():
     assert response.json()["error"]["type"] == "invalid_nonce"
 
 
+def test_attestation_chain_proxy_mode_rejects_tdx_online_verification_error():
+    model = "moonshotai/Kimi-K2.5-TEE"
+    nonce = "c" * 16
+
+    with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"), patch(
+        "app.api.v1.openai._fetch_chutes_attestation",
+        return_value=(
+            {
+                "attestation_type": "chutes",
+                "nonce": nonce,
+                "chute_id": "chute-123",
+                "all_attestations": [
+                    {
+                        "instance_id": "inst-1",
+                        "e2e_pubkey": "pk-1",
+                        "intel_quote": _make_chutes_quote_b64(nonce, "pk-1"),
+                        "tdx_verification": {"error": "upstream verifier timeout", "result": {"status": "UpToDate"}},
+                    }
+                ],
+            },
+            None,
+        ),
+    ):
+        response = client.get(
+            "/v1/attestation/chain",
+            params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"},
+            headers={"Authorization": TEST_AUTH_HEADER},
+        )
+
+    assert response.status_code == 502
+    assert response.json()["error"]["type"] == "chutes_verification_failed"
+
+
 def test_attestation_chain_invalid_verify_mode():
     with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"):
         response = client.get(

From ac1e3ca62df2328c5285395ef55a059eef45f438 Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Thu, 2 Apr 2026 17:01:35 +0800
Subject: [PATCH 15/22] fix: align proxy mode with optional chutes tdx status
 field

---
 src/app/api/v1/openai.py      |  9 ++++----
 tests/app/test_openai_e2ee.py | 43 +++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index e4be1f5..ba69d6f 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -437,10 +437,11 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -
     if tdx_error:
         errors.append("tdx_online_verification_error")
 
-    tdx_result = tdx_verification.get("result") or {}
-    tdx_status = tdx_result.get("status")
-    if tdx_status != "UpToDate":
-        errors.append(f"tdx_status_not_uptodate:{tdx_status or 'missing'}")
+    tdx_result = tdx_verification.get("result")
+    if tdx_result:
+        tdx_status = tdx_result.get("status")
+        if tdx_status != "UpToDate":
+            errors.append(f"tdx_status_not_uptodate:{tdx_status or 'missing'}")
 
     try:
         td_attributes = _extract_td_attributes(quote_bytes)
diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py
index f520125..7b0e3ff 100644
--- a/tests/app/test_openai_e2ee.py
+++ b/tests/app/test_openai_e2ee.py
@@ -497,6 +497,49 @@ def test_attestation_chain_nonce_too_short():
     assert response.json()["error"]["type"] == "invalid_nonce"
 
 
+@pytest.mark.asyncio
+@pytest.mark.respx
+async def test_attestation_chain_proxy_mode_allows_missing_tdx_result_if_other_checks_pass(respx_mock):
+    model = "moonshotai/Kimi-K2.5-TEE"
+    nonce = "d" * 16
+    e2e_pubkey = "pk-3"
+    quote_b64 = _make_chutes_quote_b64(nonce, e2e_pubkey)
+
+    respx_mock.get("https://api.chutes.ai/chutes/").mock(
+        return_value=httpx.Response(200, json={"items": [{"chute_id": "chute-777"}]})
+    )
+    respx_mock.get("https://api.chutes.ai/e2e/instances/chute-777").mock(
+        return_value=httpx.Response(
+            200,
+            json={"instances": [{"instance_id": "inst-3", "e2e_pubkey": e2e_pubkey}]},
+        )
+    )
+    respx_mock.get("https://api.chutes.ai/chutes/chute-777/evidence").mock(
+        return_value=httpx.Response(
+            200,
+            json={
+                "evidence": [
+                    {
+                        "instance_id": "inst-3",
+                        "quote": quote_b64,
+                        "certificate": "cert",
+                    }
+                ]
+            },
+        )
+    )
+
+    with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"):
+        response = client.get(
+            "/v1/attestation/chain",
+            params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"},
+            headers={"Authorization": TEST_AUTH_HEADER},
+        )
+
+    assert response.status_code == 200
+    assert response.json()["verification_receipt"]["payload"]["result"] == "pass"
+
+
 def test_attestation_chain_proxy_mode_rejects_tdx_online_verification_error():
     model = "moonshotai/Kimi-K2.5-TEE"
     nonce = "c" * 16

From dc3242e46b4788ccdbce3be9f893fe7cef978cde Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Thu, 2 Apr 2026 17:26:32 +0800
Subject: [PATCH 16/22] feat: enrich proxy attestation receipt and test both
 verify modes

---
 src/app/api/v1/openai.py              | 63 +++++++++++++++++++++------
 tests/app/test_openai_e2ee.py         |  4 ++
 tests/app/verify_attestation_chain.py | 51 ++++++++++++++--------
 3 files changed, 87 insertions(+), 31 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index ba69d6f..1ed51a3 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -417,34 +417,49 @@ def _extract_gpu_tokens(attestation: dict[str, Any]) -> Any:
     return attestation.get("gpu_evidence")
 
 
-def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -> list[str]:
+def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -> dict[str, Any]:
     errors: list[str] = []
+    detail: dict[str, Any] = {
+        "instance_id": attestation.get("instance_id"),
+        "tdx_status": "missing",
+        "tdx_error_present": False,
+        "debug_mode_disabled": False,
+        "binding_verified": False,
+        "gpu_token_checked": False,
+        "gpu_verified": None,
+    }
 
     quote_b64 = attestation.get("intel_quote") or attestation.get("quote")
     e2e_pubkey = attestation.get("e2e_pubkey")
     if not quote_b64:
-        return ["missing_intel_quote"]
+        detail["errors"] = ["missing_intel_quote"]
+        return detail
     if not e2e_pubkey:
-        return ["missing_e2e_pubkey"]
+        detail["errors"] = ["missing_e2e_pubkey"]
+        return detail
 
     try:
         quote_bytes = _decode_quote(quote_b64)
     except Exception:
-        return ["invalid_quote_base64"]
+        detail["errors"] = ["invalid_quote_base64"]
+        return detail
 
     tdx_verification = attestation.get("tdx_verification") or {}
     tdx_error = tdx_verification.get("error")
     if tdx_error:
+        detail["tdx_error_present"] = True
         errors.append("tdx_online_verification_error")
 
     tdx_result = tdx_verification.get("result")
     if tdx_result:
-        tdx_status = tdx_result.get("status")
+        tdx_status = tdx_result.get("status") or "missing"
+        detail["tdx_status"] = tdx_status
         if tdx_status != "UpToDate":
-            errors.append(f"tdx_status_not_uptodate:{tdx_status or 'missing'}")
+            errors.append(f"tdx_status_not_uptodate:{tdx_status}")
 
     try:
         td_attributes = _extract_td_attributes(quote_bytes)
+        detail["debug_mode_disabled"] = not (td_attributes & 1)
         if td_attributes & 1:
             errors.append("tdx_debug_mode_enabled")
     except Exception:
@@ -452,29 +467,42 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -
 
     expected_report_data = sha256((nonce + e2e_pubkey).encode("utf-8")).hexdigest().lower()
     actual_report_data = _extract_report_data_sha256(quote_bytes)
+    detail["expected_report_data"] = expected_report_data
+    detail["actual_report_data"] = actual_report_data
     if actual_report_data != expected_report_data:
         errors.append("report_data_binding_mismatch")
+    else:
+        detail["binding_verified"] = True
 
     gpu_tokens = _extract_gpu_tokens(attestation)
     if isinstance(gpu_tokens, dict):
         if gpu_tokens.get("error"):
+            detail["gpu_token_checked"] = True
+            detail["gpu_verified"] = False
             errors.append("gpu_tokens_error")
         tokens = gpu_tokens.get("tokens")
         if tokens:
+            detail["gpu_token_checked"] = True
             try:
                 platform_entry = tokens[0]
                 if not isinstance(platform_entry, list) or len(platform_entry) < 2:
+                    detail["gpu_verified"] = False
                     errors.append("gpu_platform_token_format_invalid")
                 else:
                     platform_claims = _decode_jwt_payload_without_verification(platform_entry[1])
-                    if platform_claims.get("x-nvidia-overall-att-result") is not True:
+                    overall_ok = platform_claims.get("x-nvidia-overall-att-result") is True
+                    nonce_ok = platform_claims.get("eat_nonce") == expected_report_data
+                    detail["gpu_verified"] = overall_ok and nonce_ok
+                    if not overall_ok:
                         errors.append("gpu_overall_attestation_failed")
-                    if platform_claims.get("eat_nonce") != expected_report_data:
+                    if not nonce_ok:
                         errors.append("gpu_eat_nonce_mismatch")
             except Exception:
+                detail["gpu_verified"] = False
                 errors.append("gpu_tokens_parse_failed")
 
-    return errors
+    detail["errors"] = errors
+    return detail
 
 
 def _verify_chutes_attestation_bundle(attestation_bundle: dict[str, Any], nonce: str) -> tuple[bool, list[dict[str, Any]]]:
@@ -485,11 +513,10 @@ def _verify_chutes_attestation_bundle(attestation_bundle: dict[str, Any], nonce:
 
     all_ok = True
     for att in attestations:
-        instance_id = att.get("instance_id")
-        att_errors = _verify_single_chutes_attestation(att, nonce)
-        if att_errors:
+        att_detail = _verify_single_chutes_attestation(att, nonce)
+        if att_detail.get("errors"):
             all_ok = False
-        details.append({"instance_id": instance_id, "errors": att_errors})
+        details.append(att_detail)
 
     return all_ok, details
 
@@ -611,6 +638,10 @@ async def attestation_chain(
             type="chutes_verification_failed",
         )
 
+    total_instances = len(verification_details)
+    uptodate_instances = sum(1 for d in verification_details if d.get("tdx_status") == "UpToDate")
+    binding_verified_instances = sum(1 for d in verification_details if d.get("binding_verified") is True)
+
     receipt_payload = {
         "nonce": nonce,
         "request_hash": sha256(f"{model}:{nonce}".encode("utf-8")).hexdigest(),
@@ -625,6 +656,12 @@ async def attestation_chain(
         "binding_signing_address": binding_proof["signing_address"],
         "verified_at": int(time.time()),
         "result": "pass",
+        "verification_summary": {
+            "total_instances": total_instances,
+            "tdx_uptodate_instances": uptodate_instances,
+            "binding_verified_instances": binding_verified_instances,
+        },
+        "instance_results": verification_details,
     }
     receipt_text = json.dumps(receipt_payload, sort_keys=True, separators=(",", ":"))
 
diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py
index 7b0e3ff..9cb87e3 100644
--- a/tests/app/test_openai_e2ee.py
+++ b/tests/app/test_openai_e2ee.py
@@ -400,6 +400,10 @@ async def test_attestation_chain_success_proxy_mode(respx_mock):
     assert "verification_receipt" in data
     assert data["verification_receipt"]["payload"]["result"] == "pass"
     assert data["verification_receipt"]["payload"]["model"] == model
+    summary = data["verification_receipt"]["payload"]["verification_summary"]
+    assert summary["total_instances"] == 1
+    assert summary["binding_verified_instances"] == 1
+    assert len(data["verification_receipt"]["payload"]["instance_results"]) == 1
 
 
 @pytest.mark.asyncio
diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py
index ad03f5f..d4ac426 100644
--- a/tests/app/verify_attestation_chain.py
+++ b/tests/app/verify_attestation_chain.py
@@ -15,7 +15,7 @@
 CONNECT_TIMEOUT = int(os.environ.get("CONNECT_TIMEOUT", "15"))
 READ_TIMEOUT = int(os.environ.get("READ_TIMEOUT", "300"))
 MAX_RETRIES = int(os.environ.get("MAX_RETRIES", "3"))
-VERIFY_MODE = os.environ.get("VERIFY_MODE", "proxy").lower()
+VERIFY_MODE = os.environ.get("VERIFY_MODE", "both").lower()
 
 
 def _canonical_json(obj) -> str:
@@ -53,13 +53,8 @@ def _verify_binding_signature(binding_proof: dict):
     raise RuntimeError(f"unsupported signing_algo: {signing_algo}")
 
 
-def main():
-    if not BASE_URL or not API_KEY or not MODEL_NAME:
-        raise RuntimeError("Please set BASE_URL, API_KEY, MODEL_NAME")
-
-    # Use 32-byte hex nonce to match attestation implementations that expect hex challenge.
+def _run_mode(verify_mode: str):
     nonce = secrets.token_hex(32)
-
     url = f"{BASE_URL}/v1/attestation/chain"
 
     resp = None
@@ -67,7 +62,7 @@ def main():
         try:
             resp = requests.get(
                 url,
-                params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO, "verify_mode": VERIFY_MODE},
+                params={"model": MODEL_NAME, "nonce": nonce, "signing_algo": SIGNING_ALGO, "verify_mode": verify_mode},
                 headers={"Authorization": f"Bearer {API_KEY}"},
                 timeout=(CONNECT_TIMEOUT, READ_TIMEOUT),
             )
@@ -76,23 +71,23 @@ def main():
                 raise RuntimeError(
                     f"Read timeout after {MAX_RETRIES} attempts (connect={CONNECT_TIMEOUT}s, read={READ_TIMEOUT}s)."
                 )
-            print(f"attempt {attempt}/{MAX_RETRIES} timed out, retrying...")
+            print(f"[{verify_mode}] attempt {attempt}/{MAX_RETRIES} timed out, retrying...")
             continue
 
-        print("status:", resp.status_code)
+        print(f"[{verify_mode}] status:", resp.status_code)
         if resp.status_code == 429:
             retry_after = resp.headers.get("Retry-After")
-            print("body:", resp.text)
+            print(f"[{verify_mode}] body:", resp.text)
             if attempt == MAX_RETRIES:
                 raise RuntimeError(
                     "429 from /v1/attestation/chain (likely upstream Chutes attestation rate limit). "
                     f"Retry-After={retry_after}."
                 )
-            print(f"attempt {attempt}/{MAX_RETRIES} got 429, retrying...")
+            print(f"[{verify_mode}] attempt {attempt}/{MAX_RETRIES} got 429, retrying...")
             continue
 
         if resp.status_code >= 400:
-            print("body:", resp.text)
+            print(f"[{verify_mode}] body:", resp.text)
         resp.raise_for_status()
         break
 
@@ -100,10 +95,8 @@ def main():
         raise RuntimeError("No response received")
 
     data = resp.json()
-
-    # Top-level structure checks
     assert data.get("version") == "1", "unexpected chain version"
-    assert data.get("verify_mode") == VERIFY_MODE, f"unexpected verify_mode: {data.get('verify_mode')}"
+    assert data.get("verify_mode") == verify_mode, f"unexpected verify_mode: {data.get('verify_mode')}"
     assert "proxy" in data, "missing proxy section"
 
     proxy = data["proxy"]
@@ -111,7 +104,7 @@ def main():
     assert proxy.get("signing_public_key"), "missing proxy.signing_public_key"
     assert proxy_att.get("signing_public_key") == proxy["signing_public_key"], "proxy signing_public_key mismatch"
 
-    if VERIFY_MODE == "passthrough":
+    if verify_mode == "passthrough":
         assert "upstream" in data and "binding_proof" in data, "missing passthrough sections"
         upstream = data["upstream"]
         upstream_att = upstream["attestation"]
@@ -145,12 +138,34 @@ def main():
         assert payload.get("model") == MODEL_NAME, "receipt model mismatch"
         assert payload.get("nonce") == nonce, "receipt nonce mismatch"
 
+        summary = payload.get("verification_summary") or {}
+        instance_results = payload.get("instance_results") or []
+        assert summary.get("total_instances") == len(instance_results), "summary total_instances mismatch"
+        assert summary.get("binding_verified_instances", 0) >= 1, "expected at least one binding-verified instance"
+
         print("[OK] /v1/attestation/chain proxy mode validated")
+        print("verification_summary:", json.dumps(summary, ensure_ascii=False))
+        print("instance_results_preview:", json.dumps(instance_results[:2], ensure_ascii=False))
 
     print("nonce:", nonce)
     print("model:", MODEL_NAME)
     print("signing_algo:", SIGNING_ALGO)
-    print("verify_mode:", VERIFY_MODE)
+    print("verify_mode:", verify_mode)
+
+
+def main():
+    if not BASE_URL or not API_KEY or not MODEL_NAME:
+        raise RuntimeError("Please set BASE_URL, API_KEY, MODEL_NAME")
+
+    if VERIFY_MODE == "both":
+        _run_mode("proxy")
+        _run_mode("passthrough")
+        return
+
+    if VERIFY_MODE not in {"proxy", "passthrough"}:
+        raise RuntimeError("VERIFY_MODE must be one of: proxy, passthrough, both")
+
+    _run_mode(VERIFY_MODE)
 
 
 if __name__ == "__main__":

From 5eb1e20f1cf5a118aa180bfeb1ee78f742e7426a Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Thu, 2 Apr 2026 20:13:53 +0800
Subject: [PATCH 17/22] fix: make chain verifier script schema-tolerant for
 summary fields

---
 tests/app/verify_attestation_chain.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py
index d4ac426..46f3bed 100644
--- a/tests/app/verify_attestation_chain.py
+++ b/tests/app/verify_attestation_chain.py
@@ -140,11 +140,21 @@ def _run_mode(verify_mode: str):
 
         summary = payload.get("verification_summary") or {}
         instance_results = payload.get("instance_results") or []
-        assert summary.get("total_instances") == len(instance_results), "summary total_instances mismatch"
-        assert summary.get("binding_verified_instances", 0) >= 1, "expected at least one binding-verified instance"
+
+        summary_total = summary.get("total_instances")
+        if summary_total is not None and instance_results:
+            assert summary_total == len(instance_results), (
+                f"summary total_instances mismatch: summary={summary_total}, instance_results={len(instance_results)}"
+            )
+        elif summary_total is not None and not instance_results:
+            print("[WARN] verification_summary exists but instance_results missing/empty. This may indicate an older server build.")
+
+        if instance_results:
+            assert summary.get("binding_verified_instances", 0) >= 1, "expected at least one binding-verified instance"
 
         print("[OK] /v1/attestation/chain proxy mode validated")
         print("verification_summary:", json.dumps(summary, ensure_ascii=False))
+        print("instance_results_count:", len(instance_results))
         print("instance_results_preview:", json.dumps(instance_results[:2], ensure_ascii=False))
 
     print("nonce:", nonce)

From f5b94970338870748f175732b2bab237c6f96d16 Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Thu, 2 Apr 2026 21:01:30 +0800
Subject: [PATCH 18/22] feat: enforce online TDX verification in proxy mode

---
 src/app/api/v1/openai.py              | 32 +++++++++++++++++++++++++--
 tests/app/test_openai_e2ee.py         | 15 ++++++++-----
 tests/app/verify_attestation_chain.py |  2 +-
 3 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index 1ed51a3..f545e66 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -1,7 +1,9 @@
+import asyncio
 import base64
 import json
 import os
 import time
+from concurrent.futures import ThreadPoolExecutor
 from hashlib import sha256
 from typing import Any, Optional
 
@@ -417,6 +419,30 @@ def _extract_gpu_tokens(attestation: dict[str, Any]) -> Any:
     return attestation.get("gpu_evidence")
 
 
+def _verify_tdx_online(quote_b64: str) -> dict[str, Any]:
+    try:
+        import dcap_qvl
+
+        quote_bytes = _decode_quote(quote_b64)
+
+        def run_verification():
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            try:
+                return loop.run_until_complete(dcap_qvl.get_collateral_and_verify(quote_bytes))
+            finally:
+                loop.close()
+
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            future = executor.submit(run_verification)
+            verified_report = future.result(timeout=30)
+
+        result = json.loads(verified_report.to_json())
+        return {"result": result, "error": None}
+    except Exception as exc:
+        return {"result": None, "error": str(exc)}
+
+
 def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -> dict[str, Any]:
     errors: list[str] = []
     detail: dict[str, Any] = {
@@ -444,14 +470,16 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -
         detail["errors"] = ["invalid_quote_base64"]
         return detail
 
-    tdx_verification = attestation.get("tdx_verification") or {}
+    tdx_verification = _verify_tdx_online(quote_b64)
     tdx_error = tdx_verification.get("error")
     if tdx_error:
         detail["tdx_error_present"] = True
         errors.append("tdx_online_verification_error")
 
     tdx_result = tdx_verification.get("result")
-    if tdx_result:
+    if not tdx_result:
+        errors.append("tdx_status_missing")
+    else:
         tdx_status = tdx_result.get("status") or "missing"
         detail["tdx_status"] = tdx_status
         if tdx_status != "UpToDate":
diff --git a/tests/app/test_openai_e2ee.py b/tests/app/test_openai_e2ee.py
index 9cb87e3..441e0dd 100644
--- a/tests/app/test_openai_e2ee.py
+++ b/tests/app/test_openai_e2ee.py
@@ -385,7 +385,10 @@ async def test_attestation_chain_success_proxy_mode(respx_mock):
         )
     )
 
-    with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"):
+    with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"), patch(
+        "app.api.v1.openai._verify_tdx_online",
+        return_value={"result": {"status": "UpToDate"}, "error": None},
+    ):
         response = client.get(
             "/v1/attestation/chain",
             params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"},
@@ -503,7 +506,7 @@ def test_attestation_chain_nonce_too_short():
 
 @pytest.mark.asyncio
 @pytest.mark.respx
-async def test_attestation_chain_proxy_mode_allows_missing_tdx_result_if_other_checks_pass(respx_mock):
+async def test_attestation_chain_proxy_mode_uses_online_tdx_verification(respx_mock):
     model = "moonshotai/Kimi-K2.5-TEE"
     nonce = "d" * 16
     e2e_pubkey = "pk-3"
@@ -533,7 +536,10 @@ async def test_attestation_chain_proxy_mode_allows_missing_tdx_result_if_other_c
         )
     )
 
-    with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"):
+    with patch("app.api.v1.openai.CHUTES_ENABLED", True), patch("app.api.v1.openai.CHUTES_API_KEY", "test-key"), patch(
+        "app.api.v1.openai._verify_tdx_online",
+        return_value={"result": {"status": "UpToDate"}, "error": None},
+    ):
         response = client.get(
             "/v1/attestation/chain",
             params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"},
@@ -560,13 +566,12 @@ def test_attestation_chain_proxy_mode_rejects_tdx_online_verification_error():
                         "instance_id": "inst-1",
                         "e2e_pubkey": "pk-1",
                         "intel_quote": _make_chutes_quote_b64(nonce, "pk-1"),
-                        "tdx_verification": {"error": "upstream verifier timeout", "result": {"status": "UpToDate"}},
                     }
                 ],
             },
             None,
         ),
-    ):
+    ), patch("app.api.v1.openai._verify_tdx_online", return_value={"result": None, "error": "tdx verify failed"}):
         response = client.get(
             "/v1/attestation/chain",
             params={"model": model, "nonce": nonce, "signing_algo": "ecdsa"},
diff --git a/tests/app/verify_attestation_chain.py b/tests/app/verify_attestation_chain.py
index 46f3bed..a99d853 100644
--- a/tests/app/verify_attestation_chain.py
+++ b/tests/app/verify_attestation_chain.py
@@ -155,7 +155,7 @@ def _run_mode(verify_mode: str):
         print("[OK] /v1/attestation/chain proxy mode validated")
         print("verification_summary:", json.dumps(summary, ensure_ascii=False))
         print("instance_results_count:", len(instance_results))
-        print("instance_results_preview:", json.dumps(instance_results[:2], ensure_ascii=False))
+        print("instance_results:", json.dumps(instance_results, ensure_ascii=False))
 
     print("nonce:", nonce)
     print("model:", MODEL_NAME)

From 0d20af95ebd93108240e677f6e36fb7fa4bcd682 Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Thu, 2 Apr 2026 21:14:39 +0800
Subject: [PATCH 19/22] chore: add dcap-qvl dependency for online TDX
 verification

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 3b6d536..0fdcbea 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,4 @@ dstack-sdk==0.5.3
 cryptography==43.0.1
 redis==5.2.1
 nv-ppcie-verifier==1.5.0
+dcap-qvl>=0.3.13

From fc33de4ae71c627a533371d8427c9eeff8cff17d Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Fri, 3 Apr 2026 11:42:23 +0800
Subject: [PATCH 20/22] chore: refine online TDX verification and unify
 upstream error handling

---
 src/app/api/v1/openai.py | 41 +++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index f545e66..e2fd2b7 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -62,6 +62,9 @@
 CHUTES_CHUTE_ID_CACHE: dict[str, tuple[str, float]] = {}
 CHUTES_CHUTE_ID_CACHE_TTL_SECONDS = int(os.getenv("CHUTES_CHUTE_ID_CACHE_TTL_SECONDS", "3600"))
 
+# Shared executor for online TDX verification to avoid per-attestation thread creation.
+TDX_EXECUTOR = ThreadPoolExecutor(max_workers=int(os.getenv("TDX_ONLINE_WORKERS", "4")))
+
 TIMEOUT = 60 * 10
 
 COMMON_HEADERS = {"Content-Type": "application/json", "Accept": "application/json"}
@@ -314,7 +317,11 @@ async def _resolve_chute_id(client: httpx.AsyncClient, model: str) -> str | dict
     if resp.status_code == 429:
         return _error_from_upstream_429(resp)
     if resp.status_code != 200:
-        raise HTTPException(status_code=resp.status_code, detail=resp.text)
+        return error(
+            status_code=502,
+            message=f"Failed to lookup chute by name: {resp.status_code} {resp.text}",
+            type="upstream_http_error",
+        )
 
     data = resp.json()
     items = data.get("items") or []
@@ -339,7 +346,11 @@ async def _fetch_chutes_attestation(client: httpx.AsyncClient, model: str, nonce
     if e2e_resp.status_code == 429:
         return None, _error_from_upstream_429(e2e_resp)
     if e2e_resp.status_code != 200:
-        raise HTTPException(status_code=e2e_resp.status_code, detail=e2e_resp.text)
+        return None, error(
+            status_code=502,
+            message=f"Failed to fetch E2E public keys: {e2e_resp.status_code} {e2e_resp.text}",
+            type="upstream_http_error",
+        )
 
     e2e_data = e2e_resp.json()
     instances = e2e_data.get("instances") or []
@@ -352,7 +363,11 @@ async def _fetch_chutes_attestation(client: httpx.AsyncClient, model: str, nonce
     if evidence_resp.status_code == 429:
         return None, _error_from_upstream_429(evidence_resp)
     if evidence_resp.status_code != 200:
-        raise HTTPException(status_code=evidence_resp.status_code, detail=evidence_resp.text)
+        return None, error(
+            status_code=502,
+            message=f"Failed to fetch evidence: {evidence_resp.status_code} {evidence_resp.text}",
+            type="upstream_http_error",
+        )
 
     evidence_data = evidence_resp.json()
     evidence_list = evidence_data.get("evidence") or []
@@ -419,13 +434,17 @@ def _extract_gpu_tokens(attestation: dict[str, Any]) -> Any:
     return attestation.get("gpu_evidence")
 
 
-def _verify_tdx_online(quote_b64: str) -> dict[str, Any]:
+def _verify_tdx_online(quote: str | bytes) -> dict[str, Any]:
+    """Run online TDX verification via dcap_qvl in a shared thread pool.
+
+    Accepts either base64-encoded quote (str) or raw bytes.
+    """
     try:
         import dcap_qvl
 
-        quote_bytes = _decode_quote(quote_b64)
+        quote_bytes = _decode_quote(quote) if isinstance(quote, str) else quote
 
-        def run_verification():
+        def run_verification() -> Any:
             loop = asyncio.new_event_loop()
             asyncio.set_event_loop(loop)
             try:
@@ -433,9 +452,8 @@ def run_verification():
             finally:
                 loop.close()
 
-        with ThreadPoolExecutor(max_workers=1) as executor:
-            future = executor.submit(run_verification)
-            verified_report = future.result(timeout=30)
+        future = TDX_EXECUTOR.submit(run_verification)
+        verified_report = future.result(timeout=30)
 
         result = json.loads(verified_report.to_json())
         return {"result": result, "error": None}
@@ -470,7 +488,7 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -
         detail["errors"] = ["invalid_quote_base64"]
         return detail
 
-    tdx_verification = _verify_tdx_online(quote_b64)
+    tdx_verification = _verify_tdx_online(quote_bytes)
     tdx_error = tdx_verification.get("error")
     if tdx_error:
         detail["tdx_error_present"] = True
@@ -478,7 +496,8 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -
 
     tdx_result = tdx_verification.get("result")
     if not tdx_result:
-        errors.append("tdx_status_missing")
+        if not tdx_error:
+            errors.append("tdx_status_missing")
     else:
         tdx_status = tdx_result.get("status") or "missing"
         detail["tdx_status"] = tdx_status

From 771d73b9ea87809ce4eaf0fda1323776b54636fb Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Fri, 3 Apr 2026 12:31:49 +0800
Subject: [PATCH 21/22] chutes: use 8 TDX online workers by default

---
 src/app/api/v1/openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index e2fd2b7..bef5ac0 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -63,7 +63,7 @@
 CHUTES_CHUTE_ID_CACHE_TTL_SECONDS = int(os.getenv("CHUTES_CHUTE_ID_CACHE_TTL_SECONDS", "3600"))
 
 # Shared executor for online TDX verification to avoid per-attestation thread creation.
-TDX_EXECUTOR = ThreadPoolExecutor(max_workers=int(os.getenv("TDX_ONLINE_WORKERS", "4")))
+TDX_EXECUTOR = ThreadPoolExecutor(max_workers=int(os.getenv("TDX_ONLINE_WORKERS", "8")))
 
 TIMEOUT = 60 * 10
 

From 171d089b9c82d36730c56008e78af9ea05bebcd5 Mon Sep 17 00:00:00 2001
From: mondaylord <leon@phala.network>
Date: Fri, 3 Apr 2026 12:39:10 +0800
Subject: [PATCH 22/22] chutes: async TDX verification and trimmed error
 message

---
 src/app/api/v1/openai.py | 55 ++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 22 deletions(-)

diff --git a/src/app/api/v1/openai.py b/src/app/api/v1/openai.py
index bef5ac0..ce543d7 100644
--- a/src/app/api/v1/openai.py
+++ b/src/app/api/v1/openai.py
@@ -435,7 +435,7 @@ def _extract_gpu_tokens(attestation: dict[str, Any]) -> Any:
 
 
 def _verify_tdx_online(quote: str | bytes) -> dict[str, Any]:
-    """Run online TDX verification via dcap_qvl in a shared thread pool.
+    """Run online TDX verification via dcap_qvl synchronously.
 
     Accepts either base64-encoded quote (str) or raw bytes.
     """
@@ -444,16 +444,12 @@ def _verify_tdx_online(quote: str | bytes) -> dict[str, Any]:
 
         quote_bytes = _decode_quote(quote) if isinstance(quote, str) else quote
 
-        def run_verification() -> Any:
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            try:
-                return loop.run_until_complete(dcap_qvl.get_collateral_and_verify(quote_bytes))
-            finally:
-                loop.close()
-
-        future = TDX_EXECUTOR.submit(run_verification)
-        verified_report = future.result(timeout=30)
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            verified_report = loop.run_until_complete(dcap_qvl.get_collateral_and_verify(quote_bytes))
+        finally:
+            loop.close()
 
         result = json.loads(verified_report.to_json())
         return {"result": result, "error": None}
@@ -461,7 +457,17 @@ def run_verification() -> Any:
         return {"result": None, "error": str(exc)}
 
 
-def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -> dict[str, Any]:
+async def _verify_tdx_online_async(quote: str | bytes) -> dict[str, Any]:
+    """Async wrapper that runs TDX verification in the shared executor.
+
+    This keeps the event loop responsive while using a bounded thread pool
+    for the heavy verification work.
+    """
+    loop = asyncio.get_running_loop()
+    return await loop.run_in_executor(TDX_EXECUTOR, _verify_tdx_online, quote)
+
+
+async def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -> dict[str, Any]:
     errors: list[str] = []
     detail: dict[str, Any] = {
         "instance_id": attestation.get("instance_id"),
@@ -488,7 +494,7 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -
         detail["errors"] = ["invalid_quote_base64"]
         return detail
 
-    tdx_verification = _verify_tdx_online(quote_bytes)
+    tdx_verification = await _verify_tdx_online_async(quote_bytes)
     tdx_error = tdx_verification.get("error")
     if tdx_error:
         detail["tdx_error_present"] = True
@@ -552,19 +558,20 @@ def _verify_single_chutes_attestation(attestation: dict[str, Any], nonce: str) -
     return detail
 
 
-def _verify_chutes_attestation_bundle(attestation_bundle: dict[str, Any], nonce: str) -> tuple[bool, list[dict[str, Any]]]:
+async def _verify_chutes_attestation_bundle(attestation_bundle: dict[str, Any], nonce: str) -> tuple[bool, list[dict[str, Any]]]:
     details: list[dict[str, Any]] = []
     attestations = attestation_bundle.get("all_attestations") or []
     if not attestations:
         return False, [{"instance_id": None, "errors": ["missing_all_attestations"]}]
 
-    all_ok = True
-    for att in attestations:
-        att_detail = _verify_single_chutes_attestation(att, nonce)
-        if att_detail.get("errors"):
-            all_ok = False
-        details.append(att_detail)
+    # Verify all instances concurrently, bounded by TDX_EXECUTOR size.
+    tasks = [
+        _verify_single_chutes_attestation(att, nonce)
+        for att in attestations
+    ]
+    details = await asyncio.gather(*tasks)
 
+    all_ok = all(not d.get("errors") for d in details)
     return all_ok, details
 
 
@@ -677,11 +684,15 @@ async def attestation_chain(
             "binding_proof": binding_proof,
         }
 
-    verified, verification_details = _verify_chutes_attestation_bundle(upstream_attestation, nonce)
+    verified, verification_details = await _verify_chutes_attestation_bundle(upstream_attestation, nonce)
     if not verified:
+        log.error(
+            "Chutes attestation verification failed in proxy mode: %s",
+            json.dumps(verification_details, separators=(",", ":")),
+        )
         return error(
             status_code=502,
-            message=f"Chutes attestation verification failed in proxy mode: {json.dumps(verification_details, separators=(',', ':'))}",
+            message="Chutes attestation verification failed in proxy mode",
             type="chutes_verification_failed",
         )