RayBytes · robberphex · Jun 23, 2026 · Jun 23, 2026
diff --git a/README.md b/README.md
@@ -119,6 +119,7 @@ All flags go after `chatmock serve`. These can also be set as environment variab
 | `--reasoning-compat` | `CHATGPT_LOCAL_REASONING_COMPAT` | legacy, o3, think-tags | think-tags | How reasoning is returned to the client |
 | `--fast-mode` | `CHATGPT_LOCAL_FAST_MODE` | true/false | false | Priority processing for supported models |
 | `--enable-web-search` | `CHATGPT_LOCAL_ENABLE_WEB_SEARCH` | true/false | false | Allow the model to search the web |
+| `--no-base-instructions` | `CHATGPT_LOCAL_NO_BASE_INSTRUCTIONS` | true/false | false | Do not inject ChatMock's default Codex instructions |
 | `--expose-reasoning-models` | `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS` | true/false | false | List each reasoning level as its own model |
 
 <details>

diff --git a/chatmock/app.py b/chatmock/app.py
@@ -20,6 +20,7 @@ def create_app(
     debug_model: str | None = None,
     expose_reasoning_models: bool = False,
     default_web_search: bool = False,
+    no_base_instructions: bool = False,
 ) -> Flask:
     app = Flask(__name__)
 
@@ -35,6 +36,7 @@ def create_app(
         GPT5_CODEX_INSTRUCTIONS=GPT5_CODEX_INSTRUCTIONS,
         EXPOSE_REASONING_MODELS=bool(expose_reasoning_models),
         DEFAULT_WEB_SEARCH=bool(default_web_search),
+        NO_BASE_INSTRUCTIONS=bool(no_base_instructions),
     )
 
     @app.get("/")

diff --git a/chatmock/cli.py b/chatmock/cli.py
@@ -233,6 +233,7 @@ def cmd_serve(
     debug_model: str | None,
     expose_reasoning_models: bool,
     default_web_search: bool,
+    no_base_instructions: bool,
 ) -> int:
     app = create_app(
         verbose=verbose,
@@ -244,6 +245,7 @@ def cmd_serve(
         debug_model=debug_model,
         expose_reasoning_models=expose_reasoning_models,
         default_web_search=default_web_search,
+        no_base_instructions=no_base_instructions,
     )
 
     app.run(host=host, use_reloader=False, port=port, threaded=True)
@@ -319,6 +321,15 @@ def main() -> None:
             "Also configurable via CHATGPT_LOCAL_ENABLE_WEB_SEARCH."
         ),
     )
+    p_serve.add_argument(
+        "--no-base-instructions",
+        action="store_true",
+        default=(os.getenv("CHATGPT_LOCAL_NO_BASE_INSTRUCTIONS") or "").strip().lower() in ("1", "true", "yes", "on"),
+        help=(
+            "Do not inject ChatMock's default Codex instructions when a request omits instructions. "
+            "Also configurable via CHATGPT_LOCAL_NO_BASE_INSTRUCTIONS."
+        ),
+    )
 
     p_info = sub.add_parser("info", help="Print current stored tokens and derived account id")
     p_info.add_argument("--json", action="store_true", help="Output raw auth.json contents")
@@ -341,6 +352,7 @@ def main() -> None:
                 debug_model=args.debug_model,
                 expose_reasoning_models=args.expose_reasoning_models,
                 default_web_search=args.enable_web_search,
+                no_base_instructions=args.no_base_instructions,
             )
         )
     elif args.command == "info":

diff --git a/chatmock/http.py b/chatmock/http.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from typing import Any
+
 from flask import Response, jsonify, request
 
 
@@ -16,9 +18,25 @@ def build_cors_headers() -> dict:
 
 
 def json_error(message: str, status: int = 400) -> Response:
-    resp = jsonify({"error": {"message": message}})
+    resp = jsonify(openai_error_payload(message))
     response: Response = Response(response=resp.response, status=status, mimetype="application/json")
     for k, v in build_cors_headers().items():
         response.headers.setdefault(k, v)
     return response
 
+
+def openai_error_payload(
+    message: str,
+    *,
+    error_type: str = "invalid_request_error",
+    param: str | None = None,
+    code: str | None = None,
+) -> dict[str, Any]:
+    return {
+        "error": {
+            "message": message,
+            "type": error_type,
+            "param": param,
+            "code": code,
+        }
+    }
diff --git a/chatmock/responses_api.py b/chatmock/responses_api.py
@@ -14,6 +14,7 @@
 )
 from .reasoning import build_reasoning_param
 from .session import ensure_session_id
+from .utils import normalize_tool_choice_for_responses
 
 
 @dataclass(frozen=True)
@@ -35,7 +36,9 @@ class NormalizedResponsesRequest:
     service_tier_resolution: ServiceTierResolution
 
 
-def instructions_for_model(config: Dict[str, Any], model: str) -> str:
+def instructions_for_model(config: Dict[str, Any], model: str) -> str | None:
+    if bool(config.get("NO_BASE_INSTRUCTIONS")):
+        return None
     base = config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
     if uses_codex_instructions(model):
         codex = config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
@@ -98,8 +101,11 @@ def normalize_responses_payload(
 
     instructions = normalized.get("instructions")
     if not isinstance(instructions, str) or not instructions.strip():
-        instructions = instructions_for_model(config, normalized_model)
-        normalized["instructions"] = instructions
+        if not bool(config.get("NO_BASE_INSTRUCTIONS")):
+            instructions = instructions_for_model(config, normalized_model)
+            normalized["instructions"] = instructions
+        else:
+            instructions = None
 
     reasoning_effort = config.get("REASONING_EFFORT", "medium")
     reasoning_summary = config.get("REASONING_SUMMARY", "auto")
@@ -120,6 +126,7 @@ def normalize_responses_payload(
     if "reasoning.encrypted_content" not in include_list:
         include_list.append("reasoning.encrypted_content")
     normalized["include"] = include_list
+    normalized["tool_choice"] = normalize_tool_choice_for_responses(normalized.get("tool_choice", "auto"))
 
     tools = normalized.get("tools")
     if (not isinstance(tools, list) or not tools) and bool(config.get("DEFAULT_WEB_SEARCH")):
@@ -176,23 +183,77 @@ def iter_sse_event_payloads(upstream: Any) -> Iterator[Dict[str, Any]]:
             yield evt
 
 
+def compact_response_object(response_obj: Dict[str, Any], model: str | None = None) -> Dict[str, Any]:
+    compact = {
+        "id": response_obj.get("id"),
+        "object": response_obj.get("object") or "response",
+        "created_at": response_obj.get("created_at"),
+        "status": response_obj.get("status") or "completed",
+        "output": response_obj.get("output") if isinstance(response_obj.get("output"), list) else [],
+        "model": response_obj.get("model") if isinstance(response_obj.get("model"), str) else model,
+    }
+    if not isinstance(compact["id"], str) or not compact["id"]:
+        compact["id"] = "resp"
+    if not isinstance(compact["created_at"], int):
+        compact["created_at"] = 0
+    return {k: v for k, v in compact.items() if v is not None}
+
+
+def response_object_from_events(events: List[Dict[str, Any]], model: str | None = None) -> Dict[str, Any] | None:
+    response_obj: Dict[str, Any] | None = None
+    text_parts: List[str] = []
+    done_items: List[tuple[int, Dict[str, Any]]] = []
+    for evt in events:
+        response = evt.get("response")
+        if isinstance(response, dict):
+            response_obj = response
+        kind = evt.get("type")
+        if kind == "response.output_text.delta" and isinstance(evt.get("delta"), str):
+            text_parts.append(evt["delta"])
+        elif kind == "response.output_item.done" and isinstance(evt.get("item"), dict):
+            index = evt.get("output_index")
+            done_items.append((index if isinstance(index, int) else len(done_items), evt["item"]))
+    if response_obj is None:
+        return None
+    compact = compact_response_object(response_obj, model)
+    if not compact.get("output"):
+        if done_items:
+            compact["output"] = [item for _, item in sorted(done_items, key=lambda item: item[0])]
+        elif text_parts:
+            compact["output"] = [
+                {
+                    "id": f"{compact['id']}_msg",
+                    "type": "message",
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "type": "output_text",
+                            "text": "".join(text_parts),
+                            "annotations": [],
+                        }
+                    ],
+                }
+            ]
+    return compact
+
+
 def aggregate_response_from_sse(
     upstream: Any,
     *,
     on_event: Any | None = None,
+    model: str | None = None,
 ) -> tuple[Dict[str, Any] | None, Dict[str, Any] | None]:
-    response_obj: Dict[str, Any] | None = None
+    events: List[Dict[str, Any]] = []
     error_obj: Dict[str, Any] | None = None
     try:
         for evt in iter_sse_event_payloads(upstream):
+            events.append(evt)
             if callable(on_event):
                 try:
                     on_event(evt)
                 except Exception:
                     pass
             response = evt.get("response")
-            if isinstance(response, dict):
-                response_obj = response
             kind = evt.get("type")
             if kind == "response.failed":
                 if isinstance(response, dict) and isinstance(response.get("error"), dict):
@@ -204,7 +265,7 @@ def aggregate_response_from_sse(
                 break
     finally:
         upstream.close()
-    return response_obj, error_obj
+    return response_object_from_events(events, model), error_obj
 
 
 def stream_upstream_bytes(

diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py
@@ -7,7 +7,6 @@
 
 from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context
 
-from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
 from .fast_mode import resolve_service_tier
 from .limits import record_rate_limits_from_response
 from .http import build_cors_headers
@@ -72,7 +71,7 @@ def ollama_version() -> Response:
     return resp
 
 
-def _instructions_for_model(model: str) -> str:
+def _instructions_for_model(model: str) -> str | None:
     return instructions_for_model(current_app.config, model)
 
 
@@ -308,7 +307,7 @@ def ollama_chat() -> Response:
             upstream2, err2 = start_upstream_request(
                 normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
                 input_items,
-                instructions=BASE_INSTRUCTIONS,
+                instructions=_instructions_for_model(model),
                 tools=base_tools_only,
                 tool_choice=safe_choice,
                 parallel_tool_calls=parallel_tool_calls,