diff --git a/README.md b/README.md
index ab7eba7..111e85b 100644
--- a/README.md
+++ b/README.md
@@ -119,6 +119,7 @@ All flags go after `chatmock serve`. These can also be set as environment variab
 | `--reasoning-compat` | `CHATGPT_LOCAL_REASONING_COMPAT` | legacy, o3, think-tags | think-tags | How reasoning is returned to the client |
 | `--fast-mode` | `CHATGPT_LOCAL_FAST_MODE` | true/false | false | Priority processing for supported models |
 | `--enable-web-search` | `CHATGPT_LOCAL_ENABLE_WEB_SEARCH` | true/false | false | Allow the model to search the web |
+| `--no-base-instructions` | `CHATGPT_LOCAL_NO_BASE_INSTRUCTIONS` | true/false | false | Do not inject ChatMock's default Codex instructions |
 | `--expose-reasoning-models` | `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS` | true/false | false | List each reasoning level as its own model |
 
 <details>
diff --git a/chatmock/app.py b/chatmock/app.py
index e4541dc..04d7d32 100644
--- a/chatmock/app.py
+++ b/chatmock/app.py
@@ -20,6 +20,7 @@ def create_app(
     debug_model: str | None = None,
     expose_reasoning_models: bool = False,
     default_web_search: bool = False,
+    no_base_instructions: bool = False,
 ) -> Flask:
     app = Flask(__name__)
 
@@ -35,6 +36,7 @@ def create_app(
         GPT5_CODEX_INSTRUCTIONS=GPT5_CODEX_INSTRUCTIONS,
         EXPOSE_REASONING_MODELS=bool(expose_reasoning_models),
         DEFAULT_WEB_SEARCH=bool(default_web_search),
+        NO_BASE_INSTRUCTIONS=bool(no_base_instructions),
     )
 
     @app.get("/")
diff --git a/chatmock/cli.py b/chatmock/cli.py
index 9e58195..2a1d618 100644
--- a/chatmock/cli.py
+++ b/chatmock/cli.py
@@ -233,6 +233,7 @@ def cmd_serve(
     debug_model: str | None,
     expose_reasoning_models: bool,
     default_web_search: bool,
+    no_base_instructions: bool,
 ) -> int:
     app = create_app(
         verbose=verbose,
@@ -244,6 +245,7 @@ def cmd_serve(
         debug_model=debug_model,
         expose_reasoning_models=expose_reasoning_models,
         default_web_search=default_web_search,
+        no_base_instructions=no_base_instructions,
     )
 
     app.run(host=host, use_reloader=False, port=port, threaded=True)
@@ -319,6 +321,15 @@ def main() -> None:
             "Also configurable via CHATGPT_LOCAL_ENABLE_WEB_SEARCH."
         ),
     )
+    p_serve.add_argument(
+        "--no-base-instructions",
+        action="store_true",
+        default=(os.getenv("CHATGPT_LOCAL_NO_BASE_INSTRUCTIONS") or "").strip().lower() in ("1", "true", "yes", "on"),
+        help=(
+            "Do not inject ChatMock's default Codex instructions when a request omits instructions. "
+            "Also configurable via CHATGPT_LOCAL_NO_BASE_INSTRUCTIONS."
+        ),
+    )
 
     p_info = sub.add_parser("info", help="Print current stored tokens and derived account id")
     p_info.add_argument("--json", action="store_true", help="Output raw auth.json contents")
@@ -341,6 +352,7 @@ def main() -> None:
                 debug_model=args.debug_model,
                 expose_reasoning_models=args.expose_reasoning_models,
                 default_web_search=args.enable_web_search,
+                no_base_instructions=args.no_base_instructions,
             )
         )
     elif args.command == "info":
diff --git a/chatmock/http.py b/chatmock/http.py
index 567093a..6fc7bef 100644
--- a/chatmock/http.py
+++ b/chatmock/http.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from typing import Any
+
 from flask import Response, jsonify, request
 
 
@@ -16,9 +18,25 @@ def build_cors_headers() -> dict:
 
 
 def json_error(message: str, status: int = 400) -> Response:
-    resp = jsonify({"error": {"message": message}})
+    resp = jsonify(openai_error_payload(message))
     response: Response = Response(response=resp.response, status=status, mimetype="application/json")
     for k, v in build_cors_headers().items():
         response.headers.setdefault(k, v)
     return response
 
+
+def openai_error_payload(
+    message: str,
+    *,
+    error_type: str = "invalid_request_error",
+    param: str | None = None,
+    code: str | None = None,
+) -> dict[str, Any]:
+    return {
+        "error": {
+            "message": message,
+            "type": error_type,
+            "param": param,
+            "code": code,
+        }
+    }
diff --git a/chatmock/responses_api.py b/chatmock/responses_api.py
index 51bda2a..101903f 100644
--- a/chatmock/responses_api.py
+++ b/chatmock/responses_api.py
@@ -14,6 +14,7 @@
 )
 from .reasoning import build_reasoning_param
 from .session import ensure_session_id
+from .utils import normalize_tool_choice_for_responses
 
 
 @dataclass(frozen=True)
@@ -35,7 +36,9 @@ class NormalizedResponsesRequest:
     service_tier_resolution: ServiceTierResolution
 
 
-def instructions_for_model(config: Dict[str, Any], model: str) -> str:
+def instructions_for_model(config: Dict[str, Any], model: str) -> str | None:
+    if bool(config.get("NO_BASE_INSTRUCTIONS")):
+        return None
     base = config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
     if uses_codex_instructions(model):
         codex = config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
@@ -98,8 +101,11 @@ def normalize_responses_payload(
 
     instructions = normalized.get("instructions")
     if not isinstance(instructions, str) or not instructions.strip():
-        instructions = instructions_for_model(config, normalized_model)
-        normalized["instructions"] = instructions
+        if not bool(config.get("NO_BASE_INSTRUCTIONS")):
+            instructions = instructions_for_model(config, normalized_model)
+            normalized["instructions"] = instructions
+        else:
+            instructions = None
 
     reasoning_effort = config.get("REASONING_EFFORT", "medium")
     reasoning_summary = config.get("REASONING_SUMMARY", "auto")
@@ -120,6 +126,7 @@ def normalize_responses_payload(
     if "reasoning.encrypted_content" not in include_list:
         include_list.append("reasoning.encrypted_content")
     normalized["include"] = include_list
+    normalized["tool_choice"] = normalize_tool_choice_for_responses(normalized.get("tool_choice", "auto"))
 
     tools = normalized.get("tools")
     if (not isinstance(tools, list) or not tools) and bool(config.get("DEFAULT_WEB_SEARCH")):
@@ -176,23 +183,77 @@ def iter_sse_event_payloads(upstream: Any) -> Iterator[Dict[str, Any]]:
             yield evt
 
 
+def compact_response_object(response_obj: Dict[str, Any], model: str | None = None) -> Dict[str, Any]:
+    compact = {
+        "id": response_obj.get("id"),
+        "object": response_obj.get("object") or "response",
+        "created_at": response_obj.get("created_at"),
+        "status": response_obj.get("status") or "completed",
+        "output": response_obj.get("output") if isinstance(response_obj.get("output"), list) else [],
+        "model": response_obj.get("model") if isinstance(response_obj.get("model"), str) else model,
+    }
+    if not isinstance(compact["id"], str) or not compact["id"]:
+        compact["id"] = "resp"
+    if not isinstance(compact["created_at"], int):
+        compact["created_at"] = 0
+    return {k: v for k, v in compact.items() if v is not None}
+
+
+def response_object_from_events(events: List[Dict[str, Any]], model: str | None = None) -> Dict[str, Any] | None:
+    response_obj: Dict[str, Any] | None = None
+    text_parts: List[str] = []
+    done_items: List[tuple[int, Dict[str, Any]]] = []
+    for evt in events:
+        response = evt.get("response")
+        if isinstance(response, dict):
+            response_obj = response
+        kind = evt.get("type")
+        if kind == "response.output_text.delta" and isinstance(evt.get("delta"), str):
+            text_parts.append(evt["delta"])
+        elif kind == "response.output_item.done" and isinstance(evt.get("item"), dict):
+            index = evt.get("output_index")
+            done_items.append((index if isinstance(index, int) else len(done_items), evt["item"]))
+    if response_obj is None:
+        return None
+    compact = compact_response_object(response_obj, model)
+    if not compact.get("output"):
+        if done_items:
+            compact["output"] = [item for _, item in sorted(done_items, key=lambda item: item[0])]
+        elif text_parts:
+            compact["output"] = [
+                {
+                    "id": f"{compact['id']}_msg",
+                    "type": "message",
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "type": "output_text",
+                            "text": "".join(text_parts),
+                            "annotations": [],
+                        }
+                    ],
+                }
+            ]
+    return compact
+
+
 def aggregate_response_from_sse(
     upstream: Any,
     *,
     on_event: Any | None = None,
+    model: str | None = None,
 ) -> tuple[Dict[str, Any] | None, Dict[str, Any] | None]:
-    response_obj: Dict[str, Any] | None = None
+    events: List[Dict[str, Any]] = []
     error_obj: Dict[str, Any] | None = None
     try:
         for evt in iter_sse_event_payloads(upstream):
+            events.append(evt)
             if callable(on_event):
                 try:
                     on_event(evt)
                 except Exception:
                     pass
             response = evt.get("response")
-            if isinstance(response, dict):
-                response_obj = response
             kind = evt.get("type")
             if kind == "response.failed":
                 if isinstance(response, dict) and isinstance(response.get("error"), dict):
@@ -204,7 +265,7 @@ def aggregate_response_from_sse(
                 break
     finally:
         upstream.close()
-    return response_obj, error_obj
+    return response_object_from_events(events, model), error_obj
 
 
 def stream_upstream_bytes(
diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py
index 5da18d0..7489c3a 100644
--- a/chatmock/routes_ollama.py
+++ b/chatmock/routes_ollama.py
@@ -7,7 +7,6 @@
 
 from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context
 
-from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
 from .fast_mode import resolve_service_tier
 from .limits import record_rate_limits_from_response
 from .http import build_cors_headers
@@ -72,7 +71,7 @@ def ollama_version() -> Response:
     return resp
 
 
-def _instructions_for_model(model: str) -> str:
+def _instructions_for_model(model: str) -> str | None:
     return instructions_for_model(current_app.config, model)
 
 
@@ -308,7 +307,7 @@ def ollama_chat() -> Response:
             upstream2, err2 = start_upstream_request(
                 normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
                 input_items,
-                instructions=BASE_INSTRUCTIONS,
+                instructions=_instructions_for_model(model),
                 tools=base_tools_only,
                 tool_choice=safe_choice,
                 parallel_tool_calls=parallel_tool_calls,
diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py
index eb37842..088e0ac 100644
--- a/chatmock/routes_openai.py
+++ b/chatmock/routes_openai.py
@@ -6,14 +6,14 @@
 
 from flask import Blueprint, Response, current_app, jsonify, make_response, request
 
-from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
 from .fast_mode import resolve_service_tier
 from .limits import record_rate_limits_from_response
-from .http import build_cors_headers
+from .http import build_cors_headers, openai_error_payload
 from .model_registry import list_public_models, uses_codex_instructions
 from .responses_api import (
     ResponsesRequestError,
     aggregate_response_from_sse,
+    compact_response_object,
     extract_client_session_id,
     instructions_for_model,
     normalize_responses_payload,
@@ -35,6 +35,7 @@
 from .utils import (
     convert_chat_messages_to_responses_input,
     convert_tools_chat_to_responses,
+    normalize_tool_choice_for_responses,
     sse_translate_chat,
     sse_translate_text,
 )
@@ -73,7 +74,7 @@ def _gen():
     return _gen()
 
 
-def _instructions_for_model(model: str) -> str:
+def _instructions_for_model(model: str) -> str | None:
     return instructions_for_model(current_app.config, model)
 
 
@@ -92,7 +93,7 @@ def _service_tier_from_payload(
     if resolution.warning_message and verbose:
         print(f"[FastMode] {resolution.warning_message}")
     if resolution.error_message:
-        err = {"error": {"message": resolution.error_message}}
+        err = openai_error_payload(resolution.error_message, code="unsupported_service_tier")
         if verbose:
             _log_json("OUT POST service_tier resolution", err)
         resp = make_response(jsonify(err), 400)
@@ -122,7 +123,7 @@ def chat_completions() -> Response:
         try:
             payload = json.loads(raw.replace("\r", "").replace("\n", ""))
         except Exception:
-            err = {"error": {"message": "Invalid JSON body"}}
+            err = openai_error_payload("Invalid JSON body")
             if verbose:
                 _log_json("OUT POST /v1/chat/completions", err)
             return jsonify(err), 400
@@ -137,7 +138,7 @@ def chat_completions() -> Response:
     if messages is None:
         messages = []
     if not isinstance(messages, list):
-        err = {"error": {"message": "Request must include messages: []"}}
+        err = openai_error_payload("Request must include messages: []", param="messages")
         if verbose:
             _log_json("OUT POST /v1/chat/completions", err)
         return jsonify(err), 400
@@ -153,7 +154,7 @@ def chat_completions() -> Response:
     include_usage = bool(stream_options.get("include_usage", False))
 
     tools_responses = convert_tools_chat_to_responses(payload.get("tools"))
-    tool_choice = payload.get("tool_choice", "auto")
+    tool_choice = normalize_tool_choice_for_responses(payload.get("tool_choice", "auto"))
     parallel_tool_calls = bool(payload.get("parallel_tool_calls", False))
     responses_tools_payload = payload.get("responses_tools") if isinstance(payload.get("responses_tools"), list) else []
     extra_tools: List[Dict[str, Any]] = []
@@ -164,10 +165,11 @@ def chat_completions() -> Response:
                 continue
             if _t.get("type") not in ("web_search", "web_search_preview"):
                 err = {
-                    "error": {
-                        "message": "Only web_search/web_search_preview are supported in responses_tools",
-                        "code": "RESPONSES_TOOL_UNSUPPORTED",
-                    }
+                    **openai_error_payload(
+                        "Only web_search/web_search_preview are supported in responses_tools",
+                        param="responses_tools",
+                        code="RESPONSES_TOOL_UNSUPPORTED",
+                    )
                 }
                 if verbose:
                     _log_json("OUT POST /v1/chat/completions", err)
@@ -187,7 +189,11 @@ def chat_completions() -> Response:
             except Exception:
                 size = 0
             if size > MAX_TOOLS_BYTES:
-                err = {"error": {"message": "responses_tools too large", "code": "RESPONSES_TOOLS_TOO_LARGE"}}
+                err = openai_error_payload(
+                    "responses_tools too large",
+                    param="responses_tools",
+                    code="RESPONSES_TOOLS_TOO_LARGE",
+                )
                 if verbose:
                     _log_json("OUT POST /v1/chat/completions", err)
                 return jsonify(err), 400
@@ -253,11 +259,11 @@ def chat_completions() -> Response:
             if verbose:
                 print("[Passthrough] Upstream rejected tools; retrying without extra tools (args redacted)")
             base_tools_only = convert_tools_chat_to_responses(payload.get("tools"))
-            safe_choice = payload.get("tool_choice", "auto")
+            safe_choice = normalize_tool_choice_for_responses(payload.get("tool_choice", "auto"))
             upstream2, err2 = start_upstream_request(
                 model,
                 input_items,
-                instructions=BASE_INSTRUCTIONS,
+                instructions=_instructions_for_model(model),
                 tools=base_tools_only,
                 tool_choice=safe_choice,
                 parallel_tool_calls=parallel_tool_calls,
@@ -269,10 +275,11 @@ def chat_completions() -> Response:
                 upstream = upstream2
             else:
                 err = {
-                    "error": {
-                        "message": (err_body.get("error", {}) or {}).get("message", "Upstream error"),
-                        "code": "RESPONSES_TOOLS_REJECTED",
-                    }
+                    **openai_error_payload(
+                        (err_body.get("error", {}) or {}).get("message", "Upstream error"),
+                        error_type="api_error",
+                        code="RESPONSES_TOOLS_REJECTED",
+                    )
                 }
                 if verbose:
                     _log_json("OUT POST /v1/chat/completions", err)
@@ -280,7 +287,10 @@ def chat_completions() -> Response:
         else:
             if verbose:
                 print("Upstream error status=", upstream.status_code)
-            err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}
+            err = openai_error_payload(
+                (err_body.get("error", {}) or {}).get("message", "Upstream error"),
+                error_type="api_error",
+            )
             if verbose:
                 _log_json("OUT POST /v1/chat/completions", err)
             return jsonify(err), upstream.status_code
@@ -377,7 +387,7 @@ def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
         upstream.close()
 
     if error_message:
-        resp = make_response(jsonify({"error": {"message": error_message}}), 502)
+        resp = make_response(jsonify(openai_error_payload(error_message, error_type="api_error")), 502)
         for k, v in build_cors_headers().items():
             resp.headers.setdefault(k, v)
         return resp
@@ -424,7 +434,7 @@ def completions() -> Response:
     try:
         payload = json.loads(raw) if raw else {}
     except Exception:
-        err = {"error": {"message": "Invalid JSON body"}}
+        err = openai_error_payload("Invalid JSON body")
         if verbose:
             _log_json("OUT POST /v1/completions", err)
         return jsonify(err), 400
@@ -483,7 +493,10 @@ def completions() -> Response:
             err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"raw": upstream.text}
         except Exception:
             err_body = {"raw": upstream.text}
-        err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}
+        err = openai_error_payload(
+            (err_body.get("error", {}) or {}).get("message", "Upstream error"),
+            error_type="api_error",
+        )
         if verbose:
             _log_json("OUT POST /v1/completions", err)
         return jsonify(err), upstream.status_code
@@ -584,13 +597,13 @@ def responses_create() -> Response:
     try:
         payload = json.loads(raw) if raw else {}
     except Exception:
-        err = {"error": {"message": "Invalid JSON body"}}
+        err = openai_error_payload("Invalid JSON body")
         if verbose:
             _log_json("OUT POST /v1/responses", err)
         return jsonify(err), 400
 
     if not isinstance(payload, dict):
-        err = {"error": {"message": "Request body must be a JSON object"}}
+        err = openai_error_payload("Request body must be a JSON object")
         if verbose:
             _log_json("OUT POST /v1/responses", err)
         return jsonify(err), 400
@@ -602,9 +615,7 @@ def responses_create() -> Response:
             client_session_id=extract_client_session_id(request.headers),
         )
     except ResponsesRequestError as exc:
-        err: Dict[str, Any] = {"error": {"message": str(exc)}}
-        if exc.code:
-            err["error"]["code"] = exc.code
+        err: Dict[str, Any] = openai_error_payload(str(exc), code=exc.code)
         if verbose:
             _log_json("OUT POST /v1/responses", err)
         return jsonify(err), exc.status_code
@@ -688,6 +699,7 @@ def responses_create() -> Response:
             upstream.close()
         if isinstance(body, dict):
             note_responses_final_response(normalized.session_id, body)
+            body = compact_response_object(body, normalized.requested_model or normalized.normalized_model)
             if verbose:
                 _log_json("OUT POST /v1/responses", body)
             resp = make_response(jsonify(body), upstream.status_code)
@@ -698,6 +710,7 @@ def responses_create() -> Response:
     response_obj, error_obj = aggregate_response_from_sse(
         upstream,
         on_event=lambda evt: note_responses_stream_event(normalized.session_id, evt),
+        model=normalized.requested_model or normalized.normalized_model,
     )
     if error_obj is not None:
         clear_responses_reuse_state(normalized.session_id)
@@ -710,7 +723,10 @@ def responses_create() -> Response:
 
     if response_obj is None:
         clear_responses_reuse_state(normalized.session_id)
-        err = {"error": {"message": "Upstream response stream did not contain a completed response object"}}
+        err = openai_error_payload(
+            "Upstream response stream did not contain a completed response object",
+            error_type="api_error",
+        )
         if verbose:
             _log_json("OUT POST /v1/responses", err)
         resp = make_response(jsonify(err), 502)
diff --git a/chatmock/upstream.py b/chatmock/upstream.py
index 48ec842..9e29f7f 100644
--- a/chatmock/upstream.py
+++ b/chatmock/upstream.py
@@ -69,7 +69,6 @@ def start_upstream_request(
 
     responses_payload = {
         "model": model,
-        "instructions": instructions if isinstance(instructions, str) and instructions.strip() else instructions,
         "input": input_items,
         "tools": tools or [],
         "tool_choice": tool_choice if tool_choice in ("auto", "none") or isinstance(tool_choice, dict) else "auto",
@@ -78,6 +77,8 @@ def start_upstream_request(
         "stream": True,
         "prompt_cache_key": session_id,
     }
+    if instructions is not None:
+        responses_payload["instructions"] = instructions
     if include:
         responses_payload["include"] = include
 
diff --git a/chatmock/utils.py b/chatmock/utils.py
index c04b997..14e0ace 100644
--- a/chatmock/utils.py
+++ b/chatmock/utils.py
@@ -247,6 +247,21 @@ def convert_tools_chat_to_responses(tools: Any) -> List[Dict[str, Any]]:
     return out
 
 
+def normalize_tool_choice_for_responses(tool_choice: Any) -> Any:
+    if isinstance(tool_choice, str):
+        return tool_choice if tool_choice in ("auto", "none", "required") else "auto"
+    if not isinstance(tool_choice, dict):
+        return "auto"
+    if tool_choice.get("type") != "function":
+        return tool_choice
+    if isinstance(tool_choice.get("name"), str) and tool_choice.get("name"):
+        return {"type": "function", "name": tool_choice["name"]}
+    function = tool_choice.get("function")
+    if isinstance(function, dict) and isinstance(function.get("name"), str) and function.get("name"):
+        return {"type": "function", "name": function["name"]}
+    return tool_choice
+
+
 def load_chatgpt_tokens(
     ensure_fresh: bool = True,
     *,
diff --git a/tests/test_routes.py b/tests/test_routes.py
index c5d94bc..a7c2cbc 100644
--- a/tests/test_routes.py
+++ b/tests/test_routes.py
@@ -63,6 +63,19 @@ def test_openai_models_list(self) -> None:
         self.assertIn("gpt-5.4-mini", model_ids)
         self.assertIn("gpt-5.3-codex-spark", model_ids)
 
+    def test_openai_errors_include_standard_fields(self) -> None:
+        response = self.client.post(
+            "/v1/chat/completions",
+            data="{bad json",
+            content_type="application/json",
+        )
+        body = response.get_json()
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(body["error"]["message"], "Invalid JSON body")
+        self.assertEqual(body["error"]["type"], "invalid_request_error")
+        self.assertIn("param", body["error"])
+        self.assertIn("code", body["error"])
+
     def test_ollama_tags_list(self) -> None:
         response = self.client.get("/api/tags")
         body = response.get_json()
@@ -214,6 +227,41 @@ def test_chat_completions_rejects_unsupported_explicit_fast_mode(self, mock_star
         self.assertIn("Fast mode is not supported", body["error"]["message"])
         mock_start.assert_not_called()
 
+    @patch("chatmock.routes_openai.start_upstream_request")
+    def test_chat_completions_normalizes_official_function_tool_choice(self, mock_start) -> None:
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {"type": "response.output_text.delta", "delta": "hello"},
+                    {"type": "response.completed", "response": {"id": "resp-openai"}},
+                ]
+            ),
+            None,
+        )
+        response = self.client.post(
+            "/v1/chat/completions",
+            json={
+                "model": "gpt-5.4",
+                "messages": [{"role": "user", "content": "hi"}],
+                "tools": [
+                    {
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "description": "Get weather",
+                            "parameters": {"type": "object", "properties": {}},
+                        },
+                    }
+                ],
+                "tool_choice": {"type": "function", "function": {"name": "get_weather"}},
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(
+            mock_start.call_args.kwargs["tool_choice"],
+            {"type": "function", "name": "get_weather"},
+        )
+
     @patch("chatmock.routes_openai.start_upstream_raw_request")
     def test_responses_route_returns_completed_response_object(self, mock_start) -> None:
         mock_start.return_value = (
@@ -247,6 +295,7 @@ def test_responses_route_returns_completed_response_object(self, mock_start) ->
         outbound_payload = mock_start.call_args.args[0]
         self.assertEqual(outbound_payload["model"], "gpt-5.4-mini")
         self.assertEqual(outbound_payload["store"], False)
+        self.assertIn("instructions", outbound_payload)
         self.assertEqual(
             outbound_payload["input"],
             [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}],
@@ -254,6 +303,174 @@ def test_responses_route_returns_completed_response_object(self, mock_start) ->
         self.assertEqual(outbound_payload["reasoning"]["effort"], "medium")
         self.assertIsInstance(outbound_payload["prompt_cache_key"], str)
 
+    @patch("chatmock.routes_openai.start_upstream_raw_request")
+    def test_responses_route_can_skip_default_base_instructions(self, mock_start) -> None:
+        app = create_app(no_base_instructions=True)
+        client = app.test_client()
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {
+                        "type": "response.completed",
+                        "response": {"id": "resp_no_base", "object": "response", "status": "completed", "output": []},
+                    },
+                ],
+                headers={"Content-Type": "text/event-stream"},
+            ),
+            None,
+        )
+
+        response = client.post(
+            "/v1/responses",
+            json={"model": "gpt-5.4", "input": "hello"},
+        )
+
+        self.assertEqual(response.status_code, 200)
+        outbound_payload = mock_start.call_args.args[0]
+        self.assertNotIn("instructions", outbound_payload)
+
+    @patch("chatmock.routes_openai.start_upstream_raw_request")
+    def test_responses_route_preserves_explicit_instructions_when_base_disabled(self, mock_start) -> None:
+        app = create_app(no_base_instructions=True)
+        client = app.test_client()
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {
+                        "type": "response.completed",
+                        "response": {"id": "resp_explicit", "object": "response", "status": "completed", "output": []},
+                    },
+                ],
+                headers={"Content-Type": "text/event-stream"},
+            ),
+            None,
+        )
+
+        response = client.post(
+            "/v1/responses",
+            json={"model": "gpt-5.4", "instructions": "custom", "input": "hello"},
+        )
+
+        self.assertEqual(response.status_code, 200)
+        outbound_payload = mock_start.call_args.args[0]
+        self.assertEqual(outbound_payload["instructions"], "custom")
+
+    @patch("chatmock.routes_openai.start_upstream_raw_request")
+    def test_responses_route_compacts_completed_response_object(self, mock_start) -> None:
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {
+                        "type": "response.output_text.delta",
+                        "delta": "hello",
+                    },
+                    {
+                        "type": "response.completed",
+                        "response": {
+                            "id": "resp_compact",
+                            "object": "response",
+                            "created_at": 123,
+                            "status": "completed",
+                            "model": "gpt-5.4",
+                            "instructions": "internal prompt",
+                            "parallel_tool_calls": True,
+                            "output": [],
+                        },
+                    },
+                ],
+                headers={"Content-Type": "text/event-stream"},
+            ),
+            None,
+        )
+        response = self.client.post(
+            "/v1/responses",
+            json={"model": "gpt-5.4", "input": "hello"},
+        )
+        body = response.get_json()
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(body["id"], "resp_compact")
+        self.assertNotIn("instructions", body)
+        self.assertNotIn("parallel_tool_calls", body)
+        self.assertEqual(body["output"][0]["content"][0]["text"], "hello")
+
+    @patch("chatmock.routes_openai.start_upstream_raw_request")
+    def test_responses_route_preserves_reasoning_summary_and_output(self, mock_start) -> None:
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {
+                        "type": "response.completed",
+                        "response": {
+                            "id": "resp_summary",
+                            "object": "response",
+                            "created_at": 123,
+                            "status": "completed",
+                            "model": "gpt-5.4",
+                            "instructions": "internal prompt",
+                            "output": [
+                                {
+                                    "type": "reasoning",
+                                    "id": "rs_1",
+                                    "summary": [{"type": "summary_text", "text": "reasoning summary"}],
+                                    "encrypted_content": "encrypted",
+                                    "content": [],
+                                },
+                                {
+                                    "type": "message",
+                                    "id": "msg_1",
+                                    "role": "assistant",
+                                    "content": [{"type": "output_text", "text": "final output"}],
+                                },
+                            ],
+                        },
+                    },
+                ],
+                headers={"Content-Type": "text/event-stream"},
+            ),
+            None,
+        )
+        response = self.client.post(
+            "/v1/responses",
+            json={
+                "model": "gpt-5.4",
+                "input": "hello",
+                "reasoning": {"effort": "medium", "summary": "detailed"},
+            },
+        )
+        body = response.get_json()
+        self.assertEqual(response.status_code, 200)
+        self.assertNotIn("instructions", body)
+        self.assertEqual(body["output"][0]["summary"][0]["text"], "reasoning summary")
+        self.assertEqual(body["output"][1]["content"][0]["text"], "final output")
+        outbound_payload = mock_start.call_args.args[0]
+        self.assertEqual(outbound_payload["reasoning"]["summary"], "detailed")
+
+    @patch("chatmock.routes_openai.start_upstream_raw_request")
+    def test_responses_route_normalizes_official_function_tool_choice(self, mock_start) -> None:
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {
+                        "type": "response.completed",
+                        "response": {"id": "resp_tool_choice", "object": "response", "status": "completed", "output": []},
+                    },
+                ],
+                headers={"Content-Type": "text/event-stream"},
+            ),
+            None,
+        )
+        response = self.client.post(
+            "/v1/responses",
+            json={
+                "model": "gpt-5.4",
+                "input": "hello",
+                "tool_choice": {"type": "function", "function": {"name": "get_weather"}},
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        outbound_payload = mock_start.call_args.args[0]
+        self.assertEqual(outbound_payload["tool_choice"], {"type": "function", "name": "get_weather"})
+
     @patch("chatmock.routes_openai.start_upstream_raw_request")
     def test_responses_route_honors_debug_model_override(self, mock_start) -> None:
         app = create_app(debug_model="gpt-5.4")
@@ -530,7 +747,10 @@ def test_responses_route_clears_reuse_state_after_error(self, mock_start) -> Non
 
     @patch("chatmock.routes_openai.start_upstream_raw_request")
     def test_responses_route_stream_passthrough(self, mock_start) -> None:
-        chunk = b'data: {"type":"response.output_text.delta","delta":"hello"}\n\n'
+        chunk = (
+            b'data: {"type":"response.reasoning_summary_text.delta","delta":"reasoning summary"}\n\n'
+            b'data: {"type":"response.output_text.delta","delta":"hello"}\n\n'
+        )
         mock_start.return_value = (
             FakeUpstream(
                 headers={"Content-Type": "text/event-stream"},
@@ -543,7 +763,11 @@ def test_responses_route_stream_passthrough(self, mock_start) -> None:
             json={"model": "gpt-5.4", "input": "hello", "stream": True},
         )
         self.assertEqual(response.status_code, 200)
-        self.assertIn("response.output_text.delta", response.get_data(as_text=True))
+        text = response.get_data(as_text=True)
+        self.assertIn("response.reasoning_summary_text.delta", text)
+        self.assertIn("reasoning summary", text)
+        self.assertIn("response.output_text.delta", text)
+        self.assertIn("hello", text)
 
     @patch("chatmock.routes_openai.start_upstream_raw_request")
     def test_responses_route_rejects_unsupported_explicit_priority(self, mock_start) -> None: