diff --git a/README.md b/README.md index ab7eba7..111e85b 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,7 @@ All flags go after `chatmock serve`. These can also be set as environment variab | `--reasoning-compat` | `CHATGPT_LOCAL_REASONING_COMPAT` | legacy, o3, think-tags | think-tags | How reasoning is returned to the client | | `--fast-mode` | `CHATGPT_LOCAL_FAST_MODE` | true/false | false | Priority processing for supported models | | `--enable-web-search` | `CHATGPT_LOCAL_ENABLE_WEB_SEARCH` | true/false | false | Allow the model to search the web | +| `--no-base-instructions` | `CHATGPT_LOCAL_NO_BASE_INSTRUCTIONS` | true/false | false | Do not inject ChatMock's default Codex instructions | | `--expose-reasoning-models` | `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS` | true/false | false | List each reasoning level as its own model |
diff --git a/chatmock/app.py b/chatmock/app.py index e4541dc..04d7d32 100644 --- a/chatmock/app.py +++ b/chatmock/app.py @@ -20,6 +20,7 @@ def create_app( debug_model: str | None = None, expose_reasoning_models: bool = False, default_web_search: bool = False, + no_base_instructions: bool = False, ) -> Flask: app = Flask(__name__) @@ -35,6 +36,7 @@ def create_app( GPT5_CODEX_INSTRUCTIONS=GPT5_CODEX_INSTRUCTIONS, EXPOSE_REASONING_MODELS=bool(expose_reasoning_models), DEFAULT_WEB_SEARCH=bool(default_web_search), + NO_BASE_INSTRUCTIONS=bool(no_base_instructions), ) @app.get("/") diff --git a/chatmock/cli.py b/chatmock/cli.py index 9e58195..2a1d618 100644 --- a/chatmock/cli.py +++ b/chatmock/cli.py @@ -233,6 +233,7 @@ def cmd_serve( debug_model: str | None, expose_reasoning_models: bool, default_web_search: bool, + no_base_instructions: bool, ) -> int: app = create_app( verbose=verbose, @@ -244,6 +245,7 @@ def cmd_serve( debug_model=debug_model, expose_reasoning_models=expose_reasoning_models, default_web_search=default_web_search, + no_base_instructions=no_base_instructions, ) app.run(host=host, use_reloader=False, port=port, threaded=True) @@ -319,6 +321,15 @@ def main() -> None: "Also configurable via CHATGPT_LOCAL_ENABLE_WEB_SEARCH." ), ) + p_serve.add_argument( + "--no-base-instructions", + action="store_true", + default=(os.getenv("CHATGPT_LOCAL_NO_BASE_INSTRUCTIONS") or "").strip().lower() in ("1", "true", "yes", "on"), + help=( + "Do not inject ChatMock's default Codex instructions when a request omits instructions. " + "Also configurable via CHATGPT_LOCAL_NO_BASE_INSTRUCTIONS." + ), + ) p_info = sub.add_parser("info", help="Print current stored tokens and derived account id") p_info.add_argument("--json", action="store_true", help="Output raw auth.json contents") @@ -341,6 +352,7 @@ def main() -> None: debug_model=args.debug_model, expose_reasoning_models=args.expose_reasoning_models, default_web_search=args.enable_web_search, + no_base_instructions=args.no_base_instructions, ) ) elif args.command == "info": diff --git a/chatmock/http.py b/chatmock/http.py index 567093a..6fc7bef 100644 --- a/chatmock/http.py +++ b/chatmock/http.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import Any + from flask import Response, jsonify, request @@ -16,9 +18,25 @@ def build_cors_headers() -> dict: def json_error(message: str, status: int = 400) -> Response: - resp = jsonify({"error": {"message": message}}) + resp = jsonify(openai_error_payload(message)) response: Response = Response(response=resp.response, status=status, mimetype="application/json") for k, v in build_cors_headers().items(): response.headers.setdefault(k, v) return response + +def openai_error_payload( + message: str, + *, + error_type: str = "invalid_request_error", + param: str | None = None, + code: str | None = None, +) -> dict[str, Any]: + return { + "error": { + "message": message, + "type": error_type, + "param": param, + "code": code, + } + } diff --git a/chatmock/responses_api.py b/chatmock/responses_api.py index 51bda2a..101903f 100644 --- a/chatmock/responses_api.py +++ b/chatmock/responses_api.py @@ -14,6 +14,7 @@ ) from .reasoning import build_reasoning_param from .session import ensure_session_id +from .utils import normalize_tool_choice_for_responses @dataclass(frozen=True) @@ -35,7 +36,9 @@ class NormalizedResponsesRequest: service_tier_resolution: ServiceTierResolution -def instructions_for_model(config: Dict[str, Any], model: str) -> str: +def instructions_for_model(config: Dict[str, Any], model: str) -> str | None: + if bool(config.get("NO_BASE_INSTRUCTIONS")): + return None base = config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS) if uses_codex_instructions(model): codex = config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS @@ -98,8 +101,11 @@ def normalize_responses_payload( instructions = normalized.get("instructions") if not isinstance(instructions, str) or not instructions.strip(): - instructions = instructions_for_model(config, normalized_model) - normalized["instructions"] = instructions + if not bool(config.get("NO_BASE_INSTRUCTIONS")): + instructions = instructions_for_model(config, normalized_model) + normalized["instructions"] = instructions + else: + instructions = None reasoning_effort = config.get("REASONING_EFFORT", "medium") reasoning_summary = config.get("REASONING_SUMMARY", "auto") @@ -120,6 +126,7 @@ def normalize_responses_payload( if "reasoning.encrypted_content" not in include_list: include_list.append("reasoning.encrypted_content") normalized["include"] = include_list + normalized["tool_choice"] = normalize_tool_choice_for_responses(normalized.get("tool_choice", "auto")) tools = normalized.get("tools") if (not isinstance(tools, list) or not tools) and bool(config.get("DEFAULT_WEB_SEARCH")): @@ -176,23 +183,77 @@ def iter_sse_event_payloads(upstream: Any) -> Iterator[Dict[str, Any]]: yield evt +def compact_response_object(response_obj: Dict[str, Any], model: str | None = None) -> Dict[str, Any]: + compact = { + "id": response_obj.get("id"), + "object": response_obj.get("object") or "response", + "created_at": response_obj.get("created_at"), + "status": response_obj.get("status") or "completed", + "output": response_obj.get("output") if isinstance(response_obj.get("output"), list) else [], + "model": response_obj.get("model") if isinstance(response_obj.get("model"), str) else model, + } + if not isinstance(compact["id"], str) or not compact["id"]: + compact["id"] = "resp" + if not isinstance(compact["created_at"], int): + compact["created_at"] = 0 + return {k: v for k, v in compact.items() if v is not None} + + +def response_object_from_events(events: List[Dict[str, Any]], model: str | None = None) -> Dict[str, Any] | None: + response_obj: Dict[str, Any] | None = None + text_parts: List[str] = [] + done_items: List[tuple[int, Dict[str, Any]]] = [] + for evt in events: + response = evt.get("response") + if isinstance(response, dict): + response_obj = response + kind = evt.get("type") + if kind == "response.output_text.delta" and isinstance(evt.get("delta"), str): + text_parts.append(evt["delta"]) + elif kind == "response.output_item.done" and isinstance(evt.get("item"), dict): + index = evt.get("output_index") + done_items.append((index if isinstance(index, int) else len(done_items), evt["item"])) + if response_obj is None: + return None + compact = compact_response_object(response_obj, model) + if not compact.get("output"): + if done_items: + compact["output"] = [item for _, item in sorted(done_items, key=lambda item: item[0])] + elif text_parts: + compact["output"] = [ + { + "id": f"{compact['id']}_msg", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "".join(text_parts), + "annotations": [], + } + ], + } + ] + return compact + + def aggregate_response_from_sse( upstream: Any, *, on_event: Any | None = None, + model: str | None = None, ) -> tuple[Dict[str, Any] | None, Dict[str, Any] | None]: - response_obj: Dict[str, Any] | None = None + events: List[Dict[str, Any]] = [] error_obj: Dict[str, Any] | None = None try: for evt in iter_sse_event_payloads(upstream): + events.append(evt) if callable(on_event): try: on_event(evt) except Exception: pass response = evt.get("response") - if isinstance(response, dict): - response_obj = response kind = evt.get("type") if kind == "response.failed": if isinstance(response, dict) and isinstance(response.get("error"), dict): @@ -204,7 +265,7 @@ def aggregate_response_from_sse( break finally: upstream.close() - return response_obj, error_obj + return response_object_from_events(events, model), error_obj def stream_upstream_bytes( diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py index 5da18d0..7489c3a 100644 --- a/chatmock/routes_ollama.py +++ b/chatmock/routes_ollama.py @@ -7,7 +7,6 @@ from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context -from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS from .fast_mode import resolve_service_tier from .limits import record_rate_limits_from_response from .http import build_cors_headers @@ -72,7 +71,7 @@ def ollama_version() -> Response: return resp -def _instructions_for_model(model: str) -> str: +def _instructions_for_model(model: str) -> str | None: return instructions_for_model(current_app.config, model) @@ -308,7 +307,7 @@ def ollama_chat() -> Response: upstream2, err2 = start_upstream_request( normalize_model_name(model, current_app.config.get("DEBUG_MODEL")), input_items, - instructions=BASE_INSTRUCTIONS, + instructions=_instructions_for_model(model), tools=base_tools_only, tool_choice=safe_choice, parallel_tool_calls=parallel_tool_calls, diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py index eb37842..088e0ac 100644 --- a/chatmock/routes_openai.py +++ b/chatmock/routes_openai.py @@ -6,14 +6,14 @@ from flask import Blueprint, Response, current_app, jsonify, make_response, request -from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS from .fast_mode import resolve_service_tier from .limits import record_rate_limits_from_response -from .http import build_cors_headers +from .http import build_cors_headers, openai_error_payload from .model_registry import list_public_models, uses_codex_instructions from .responses_api import ( ResponsesRequestError, aggregate_response_from_sse, + compact_response_object, extract_client_session_id, instructions_for_model, normalize_responses_payload, @@ -35,6 +35,7 @@ from .utils import ( convert_chat_messages_to_responses_input, convert_tools_chat_to_responses, + normalize_tool_choice_for_responses, sse_translate_chat, sse_translate_text, ) @@ -73,7 +74,7 @@ def _gen(): return _gen() -def _instructions_for_model(model: str) -> str: +def _instructions_for_model(model: str) -> str | None: return instructions_for_model(current_app.config, model) @@ -92,7 +93,7 @@ def _service_tier_from_payload( if resolution.warning_message and verbose: print(f"[FastMode] {resolution.warning_message}") if resolution.error_message: - err = {"error": {"message": resolution.error_message}} + err = openai_error_payload(resolution.error_message, code="unsupported_service_tier") if verbose: _log_json("OUT POST service_tier resolution", err) resp = make_response(jsonify(err), 400) @@ -122,7 +123,7 @@ def chat_completions() -> Response: try: payload = json.loads(raw.replace("\r", "").replace("\n", "")) except Exception: - err = {"error": {"message": "Invalid JSON body"}} + err = openai_error_payload("Invalid JSON body") if verbose: _log_json("OUT POST /v1/chat/completions", err) return jsonify(err), 400 @@ -137,7 +138,7 @@ def chat_completions() -> Response: if messages is None: messages = [] if not isinstance(messages, list): - err = {"error": {"message": "Request must include messages: []"}} + err = openai_error_payload("Request must include messages: []", param="messages") if verbose: _log_json("OUT POST /v1/chat/completions", err) return jsonify(err), 400 @@ -153,7 +154,7 @@ def chat_completions() -> Response: include_usage = bool(stream_options.get("include_usage", False)) tools_responses = convert_tools_chat_to_responses(payload.get("tools")) - tool_choice = payload.get("tool_choice", "auto") + tool_choice = normalize_tool_choice_for_responses(payload.get("tool_choice", "auto")) parallel_tool_calls = bool(payload.get("parallel_tool_calls", False)) responses_tools_payload = payload.get("responses_tools") if isinstance(payload.get("responses_tools"), list) else [] extra_tools: List[Dict[str, Any]] = [] @@ -164,10 +165,11 @@ def chat_completions() -> Response: continue if _t.get("type") not in ("web_search", "web_search_preview"): err = { - "error": { - "message": "Only web_search/web_search_preview are supported in responses_tools", - "code": "RESPONSES_TOOL_UNSUPPORTED", - } + **openai_error_payload( + "Only web_search/web_search_preview are supported in responses_tools", + param="responses_tools", + code="RESPONSES_TOOL_UNSUPPORTED", + ) } if verbose: _log_json("OUT POST /v1/chat/completions", err) @@ -187,7 +189,11 @@ def chat_completions() -> Response: except Exception: size = 0 if size > MAX_TOOLS_BYTES: - err = {"error": {"message": "responses_tools too large", "code": "RESPONSES_TOOLS_TOO_LARGE"}} + err = openai_error_payload( + "responses_tools too large", + param="responses_tools", + code="RESPONSES_TOOLS_TOO_LARGE", + ) if verbose: _log_json("OUT POST /v1/chat/completions", err) return jsonify(err), 400 @@ -253,11 +259,11 @@ def chat_completions() -> Response: if verbose: print("[Passthrough] Upstream rejected tools; retrying without extra tools (args redacted)") base_tools_only = convert_tools_chat_to_responses(payload.get("tools")) - safe_choice = payload.get("tool_choice", "auto") + safe_choice = normalize_tool_choice_for_responses(payload.get("tool_choice", "auto")) upstream2, err2 = start_upstream_request( model, input_items, - instructions=BASE_INSTRUCTIONS, + instructions=_instructions_for_model(model), tools=base_tools_only, tool_choice=safe_choice, parallel_tool_calls=parallel_tool_calls, @@ -269,10 +275,11 @@ def chat_completions() -> Response: upstream = upstream2 else: err = { - "error": { - "message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), - "code": "RESPONSES_TOOLS_REJECTED", - } + **openai_error_payload( + (err_body.get("error", {}) or {}).get("message", "Upstream error"), + error_type="api_error", + code="RESPONSES_TOOLS_REJECTED", + ) } if verbose: _log_json("OUT POST /v1/chat/completions", err) @@ -280,7 +287,10 @@ def chat_completions() -> Response: else: if verbose: print("Upstream error status=", upstream.status_code) - err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}} + err = openai_error_payload( + (err_body.get("error", {}) or {}).get("message", "Upstream error"), + error_type="api_error", + ) if verbose: _log_json("OUT POST /v1/chat/completions", err) return jsonify(err), upstream.status_code @@ -377,7 +387,7 @@ def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None: upstream.close() if error_message: - resp = make_response(jsonify({"error": {"message": error_message}}), 502) + resp = make_response(jsonify(openai_error_payload(error_message, error_type="api_error")), 502) for k, v in build_cors_headers().items(): resp.headers.setdefault(k, v) return resp @@ -424,7 +434,7 @@ def completions() -> Response: try: payload = json.loads(raw) if raw else {} except Exception: - err = {"error": {"message": "Invalid JSON body"}} + err = openai_error_payload("Invalid JSON body") if verbose: _log_json("OUT POST /v1/completions", err) return jsonify(err), 400 @@ -483,7 +493,10 @@ def completions() -> Response: err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"raw": upstream.text} except Exception: err_body = {"raw": upstream.text} - err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}} + err = openai_error_payload( + (err_body.get("error", {}) or {}).get("message", "Upstream error"), + error_type="api_error", + ) if verbose: _log_json("OUT POST /v1/completions", err) return jsonify(err), upstream.status_code @@ -584,13 +597,13 @@ def responses_create() -> Response: try: payload = json.loads(raw) if raw else {} except Exception: - err = {"error": {"message": "Invalid JSON body"}} + err = openai_error_payload("Invalid JSON body") if verbose: _log_json("OUT POST /v1/responses", err) return jsonify(err), 400 if not isinstance(payload, dict): - err = {"error": {"message": "Request body must be a JSON object"}} + err = openai_error_payload("Request body must be a JSON object") if verbose: _log_json("OUT POST /v1/responses", err) return jsonify(err), 400 @@ -602,9 +615,7 @@ def responses_create() -> Response: client_session_id=extract_client_session_id(request.headers), ) except ResponsesRequestError as exc: - err: Dict[str, Any] = {"error": {"message": str(exc)}} - if exc.code: - err["error"]["code"] = exc.code + err: Dict[str, Any] = openai_error_payload(str(exc), code=exc.code) if verbose: _log_json("OUT POST /v1/responses", err) return jsonify(err), exc.status_code @@ -688,6 +699,7 @@ def responses_create() -> Response: upstream.close() if isinstance(body, dict): note_responses_final_response(normalized.session_id, body) + body = compact_response_object(body, normalized.requested_model or normalized.normalized_model) if verbose: _log_json("OUT POST /v1/responses", body) resp = make_response(jsonify(body), upstream.status_code) @@ -698,6 +710,7 @@ def responses_create() -> Response: response_obj, error_obj = aggregate_response_from_sse( upstream, on_event=lambda evt: note_responses_stream_event(normalized.session_id, evt), + model=normalized.requested_model or normalized.normalized_model, ) if error_obj is not None: clear_responses_reuse_state(normalized.session_id) @@ -710,7 +723,10 @@ def responses_create() -> Response: if response_obj is None: clear_responses_reuse_state(normalized.session_id) - err = {"error": {"message": "Upstream response stream did not contain a completed response object"}} + err = openai_error_payload( + "Upstream response stream did not contain a completed response object", + error_type="api_error", + ) if verbose: _log_json("OUT POST /v1/responses", err) resp = make_response(jsonify(err), 502) diff --git a/chatmock/upstream.py b/chatmock/upstream.py index 48ec842..9e29f7f 100644 --- a/chatmock/upstream.py +++ b/chatmock/upstream.py @@ -69,7 +69,6 @@ def start_upstream_request( responses_payload = { "model": model, - "instructions": instructions if isinstance(instructions, str) and instructions.strip() else instructions, "input": input_items, "tools": tools or [], "tool_choice": tool_choice if tool_choice in ("auto", "none") or isinstance(tool_choice, dict) else "auto", @@ -78,6 +77,8 @@ def start_upstream_request( "stream": True, "prompt_cache_key": session_id, } + if instructions is not None: + responses_payload["instructions"] = instructions if include: responses_payload["include"] = include diff --git a/chatmock/utils.py b/chatmock/utils.py index c04b997..14e0ace 100644 --- a/chatmock/utils.py +++ b/chatmock/utils.py @@ -247,6 +247,21 @@ def convert_tools_chat_to_responses(tools: Any) -> List[Dict[str, Any]]: return out +def normalize_tool_choice_for_responses(tool_choice: Any) -> Any: + if isinstance(tool_choice, str): + return tool_choice if tool_choice in ("auto", "none", "required") else "auto" + if not isinstance(tool_choice, dict): + return "auto" + if tool_choice.get("type") != "function": + return tool_choice + if isinstance(tool_choice.get("name"), str) and tool_choice.get("name"): + return {"type": "function", "name": tool_choice["name"]} + function = tool_choice.get("function") + if isinstance(function, dict) and isinstance(function.get("name"), str) and function.get("name"): + return {"type": "function", "name": function["name"]} + return tool_choice + + def load_chatgpt_tokens( ensure_fresh: bool = True, *, diff --git a/tests/test_routes.py b/tests/test_routes.py index c5d94bc..a7c2cbc 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -63,6 +63,19 @@ def test_openai_models_list(self) -> None: self.assertIn("gpt-5.4-mini", model_ids) self.assertIn("gpt-5.3-codex-spark", model_ids) + def test_openai_errors_include_standard_fields(self) -> None: + response = self.client.post( + "/v1/chat/completions", + data="{bad json", + content_type="application/json", + ) + body = response.get_json() + self.assertEqual(response.status_code, 400) + self.assertEqual(body["error"]["message"], "Invalid JSON body") + self.assertEqual(body["error"]["type"], "invalid_request_error") + self.assertIn("param", body["error"]) + self.assertIn("code", body["error"]) + def test_ollama_tags_list(self) -> None: response = self.client.get("/api/tags") body = response.get_json() @@ -214,6 +227,41 @@ def test_chat_completions_rejects_unsupported_explicit_fast_mode(self, mock_star self.assertIn("Fast mode is not supported", body["error"]["message"]) mock_start.assert_not_called() + @patch("chatmock.routes_openai.start_upstream_request") + def test_chat_completions_normalizes_official_function_tool_choice(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + {"type": "response.output_text.delta", "delta": "hello"}, + {"type": "response.completed", "response": {"id": "resp-openai"}}, + ] + ), + None, + ) + response = self.client.post( + "/v1/chat/completions", + json={ + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "hi"}], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather", + "parameters": {"type": "object", "properties": {}}, + }, + } + ], + "tool_choice": {"type": "function", "function": {"name": "get_weather"}}, + }, + ) + self.assertEqual(response.status_code, 200) + self.assertEqual( + mock_start.call_args.kwargs["tool_choice"], + {"type": "function", "name": "get_weather"}, + ) + @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_returns_completed_response_object(self, mock_start) -> None: mock_start.return_value = ( @@ -247,6 +295,7 @@ def test_responses_route_returns_completed_response_object(self, mock_start) -> outbound_payload = mock_start.call_args.args[0] self.assertEqual(outbound_payload["model"], "gpt-5.4-mini") self.assertEqual(outbound_payload["store"], False) + self.assertIn("instructions", outbound_payload) self.assertEqual( outbound_payload["input"], [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}], @@ -254,6 +303,174 @@ def test_responses_route_returns_completed_response_object(self, mock_start) -> self.assertEqual(outbound_payload["reasoning"]["effort"], "medium") self.assertIsInstance(outbound_payload["prompt_cache_key"], str) + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_can_skip_default_base_instructions(self, mock_start) -> None: + app = create_app(no_base_instructions=True) + client = app.test_client() + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.completed", + "response": {"id": "resp_no_base", "object": "response", "status": "completed", "output": []}, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + + response = client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": "hello"}, + ) + + self.assertEqual(response.status_code, 200) + outbound_payload = mock_start.call_args.args[0] + self.assertNotIn("instructions", outbound_payload) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_preserves_explicit_instructions_when_base_disabled(self, mock_start) -> None: + app = create_app(no_base_instructions=True) + client = app.test_client() + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.completed", + "response": {"id": "resp_explicit", "object": "response", "status": "completed", "output": []}, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + + response = client.post( + "/v1/responses", + json={"model": "gpt-5.4", "instructions": "custom", "input": "hello"}, + ) + + self.assertEqual(response.status_code, 200) + outbound_payload = mock_start.call_args.args[0] + self.assertEqual(outbound_payload["instructions"], "custom") + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_compacts_completed_response_object(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.output_text.delta", + "delta": "hello", + }, + { + "type": "response.completed", + "response": { + "id": "resp_compact", + "object": "response", + "created_at": 123, + "status": "completed", + "model": "gpt-5.4", + "instructions": "internal prompt", + "parallel_tool_calls": True, + "output": [], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + response = self.client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": "hello"}, + ) + body = response.get_json() + self.assertEqual(response.status_code, 200) + self.assertEqual(body["id"], "resp_compact") + self.assertNotIn("instructions", body) + self.assertNotIn("parallel_tool_calls", body) + self.assertEqual(body["output"][0]["content"][0]["text"], "hello") + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_preserves_reasoning_summary_and_output(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.completed", + "response": { + "id": "resp_summary", + "object": "response", + "created_at": 123, + "status": "completed", + "model": "gpt-5.4", + "instructions": "internal prompt", + "output": [ + { + "type": "reasoning", + "id": "rs_1", + "summary": [{"type": "summary_text", "text": "reasoning summary"}], + "encrypted_content": "encrypted", + "content": [], + }, + { + "type": "message", + "id": "msg_1", + "role": "assistant", + "content": [{"type": "output_text", "text": "final output"}], + }, + ], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + response = self.client.post( + "/v1/responses", + json={ + "model": "gpt-5.4", + "input": "hello", + "reasoning": {"effort": "medium", "summary": "detailed"}, + }, + ) + body = response.get_json() + self.assertEqual(response.status_code, 200) + self.assertNotIn("instructions", body) + self.assertEqual(body["output"][0]["summary"][0]["text"], "reasoning summary") + self.assertEqual(body["output"][1]["content"][0]["text"], "final output") + outbound_payload = mock_start.call_args.args[0] + self.assertEqual(outbound_payload["reasoning"]["summary"], "detailed") + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_normalizes_official_function_tool_choice(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.completed", + "response": {"id": "resp_tool_choice", "object": "response", "status": "completed", "output": []}, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + response = self.client.post( + "/v1/responses", + json={ + "model": "gpt-5.4", + "input": "hello", + "tool_choice": {"type": "function", "function": {"name": "get_weather"}}, + }, + ) + self.assertEqual(response.status_code, 200) + outbound_payload = mock_start.call_args.args[0] + self.assertEqual(outbound_payload["tool_choice"], {"type": "function", "name": "get_weather"}) + @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_honors_debug_model_override(self, mock_start) -> None: app = create_app(debug_model="gpt-5.4") @@ -530,7 +747,10 @@ def test_responses_route_clears_reuse_state_after_error(self, mock_start) -> Non @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_stream_passthrough(self, mock_start) -> None: - chunk = b'data: {"type":"response.output_text.delta","delta":"hello"}\n\n' + chunk = ( + b'data: {"type":"response.reasoning_summary_text.delta","delta":"reasoning summary"}\n\n' + b'data: {"type":"response.output_text.delta","delta":"hello"}\n\n' + ) mock_start.return_value = ( FakeUpstream( headers={"Content-Type": "text/event-stream"}, @@ -543,7 +763,11 @@ def test_responses_route_stream_passthrough(self, mock_start) -> None: json={"model": "gpt-5.4", "input": "hello", "stream": True}, ) self.assertEqual(response.status_code, 200) - self.assertIn("response.output_text.delta", response.get_data(as_text=True)) + text = response.get_data(as_text=True) + self.assertIn("response.reasoning_summary_text.delta", text) + self.assertIn("reasoning summary", text) + self.assertIn("response.output_text.delta", text) + self.assertIn("hello", text) @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_rejects_unsupported_explicit_priority(self, mock_start) -> None: