Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ All flags go after `chatmock serve`. These can also be set as environment variab
| `--reasoning-compat` | `CHATGPT_LOCAL_REASONING_COMPAT` | legacy, o3, think-tags | think-tags | How reasoning is returned to the client |
| `--fast-mode` | `CHATGPT_LOCAL_FAST_MODE` | true/false | false | Priority processing for supported models |
| `--enable-web-search` | `CHATGPT_LOCAL_ENABLE_WEB_SEARCH` | true/false | false | Allow the model to search the web |
| `--no-base-instructions` | `CHATGPT_LOCAL_NO_BASE_INSTRUCTIONS` | true/false | false | Do not inject ChatMock's default Codex instructions |
| `--expose-reasoning-models` | `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS` | true/false | false | List each reasoning level as its own model |

<details>
Expand Down
2 changes: 2 additions & 0 deletions chatmock/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def create_app(
debug_model: str | None = None,
expose_reasoning_models: bool = False,
default_web_search: bool = False,
no_base_instructions: bool = False,
) -> Flask:
app = Flask(__name__)

Expand All @@ -35,6 +36,7 @@ def create_app(
GPT5_CODEX_INSTRUCTIONS=GPT5_CODEX_INSTRUCTIONS,
EXPOSE_REASONING_MODELS=bool(expose_reasoning_models),
DEFAULT_WEB_SEARCH=bool(default_web_search),
NO_BASE_INSTRUCTIONS=bool(no_base_instructions),
)

@app.get("/")
Expand Down
12 changes: 12 additions & 0 deletions chatmock/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ def cmd_serve(
debug_model: str | None,
expose_reasoning_models: bool,
default_web_search: bool,
no_base_instructions: bool,
) -> int:
app = create_app(
verbose=verbose,
Expand All @@ -244,6 +245,7 @@ def cmd_serve(
debug_model=debug_model,
expose_reasoning_models=expose_reasoning_models,
default_web_search=default_web_search,
no_base_instructions=no_base_instructions,
)

app.run(host=host, use_reloader=False, port=port, threaded=True)
Expand Down Expand Up @@ -319,6 +321,15 @@ def main() -> None:
"Also configurable via CHATGPT_LOCAL_ENABLE_WEB_SEARCH."
),
)
p_serve.add_argument(
"--no-base-instructions",
action="store_true",
default=(os.getenv("CHATGPT_LOCAL_NO_BASE_INSTRUCTIONS") or "").strip().lower() in ("1", "true", "yes", "on"),
help=(
"Do not inject ChatMock's default Codex instructions when a request omits instructions. "
"Also configurable via CHATGPT_LOCAL_NO_BASE_INSTRUCTIONS."
),
)

p_info = sub.add_parser("info", help="Print current stored tokens and derived account id")
p_info.add_argument("--json", action="store_true", help="Output raw auth.json contents")
Expand All @@ -341,6 +352,7 @@ def main() -> None:
debug_model=args.debug_model,
expose_reasoning_models=args.expose_reasoning_models,
default_web_search=args.enable_web_search,
no_base_instructions=args.no_base_instructions,
)
)
elif args.command == "info":
Expand Down
20 changes: 19 additions & 1 deletion chatmock/http.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

from typing import Any

from flask import Response, jsonify, request


Expand All @@ -16,9 +18,25 @@ def build_cors_headers() -> dict:


def json_error(message: str, status: int = 400) -> Response:
resp = jsonify({"error": {"message": message}})
resp = jsonify(openai_error_payload(message))
response: Response = Response(response=resp.response, status=status, mimetype="application/json")
for k, v in build_cors_headers().items():
response.headers.setdefault(k, v)
return response


def openai_error_payload(
message: str,
*,
error_type: str = "invalid_request_error",
param: str | None = None,
code: str | None = None,
) -> dict[str, Any]:
return {
"error": {
"message": message,
"type": error_type,
"param": param,
"code": code,
}
}
75 changes: 68 additions & 7 deletions chatmock/responses_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
)
from .reasoning import build_reasoning_param
from .session import ensure_session_id
from .utils import normalize_tool_choice_for_responses


@dataclass(frozen=True)
Expand All @@ -35,7 +36,9 @@ class NormalizedResponsesRequest:
service_tier_resolution: ServiceTierResolution


def instructions_for_model(config: Dict[str, Any], model: str) -> str:
def instructions_for_model(config: Dict[str, Any], model: str) -> str | None:
if bool(config.get("NO_BASE_INSTRUCTIONS")):
return None
base = config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
if uses_codex_instructions(model):
codex = config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
Expand Down Expand Up @@ -98,8 +101,11 @@ def normalize_responses_payload(

instructions = normalized.get("instructions")
if not isinstance(instructions, str) or not instructions.strip():
instructions = instructions_for_model(config, normalized_model)
normalized["instructions"] = instructions
if not bool(config.get("NO_BASE_INSTRUCTIONS")):
instructions = instructions_for_model(config, normalized_model)
normalized["instructions"] = instructions
else:
instructions = None

reasoning_effort = config.get("REASONING_EFFORT", "medium")
reasoning_summary = config.get("REASONING_SUMMARY", "auto")
Expand All @@ -120,6 +126,7 @@ def normalize_responses_payload(
if "reasoning.encrypted_content" not in include_list:
include_list.append("reasoning.encrypted_content")
normalized["include"] = include_list
normalized["tool_choice"] = normalize_tool_choice_for_responses(normalized.get("tool_choice", "auto"))

tools = normalized.get("tools")
if (not isinstance(tools, list) or not tools) and bool(config.get("DEFAULT_WEB_SEARCH")):
Expand Down Expand Up @@ -176,23 +183,77 @@ def iter_sse_event_payloads(upstream: Any) -> Iterator[Dict[str, Any]]:
yield evt


def compact_response_object(response_obj: Dict[str, Any], model: str | None = None) -> Dict[str, Any]:
compact = {
"id": response_obj.get("id"),
"object": response_obj.get("object") or "response",
"created_at": response_obj.get("created_at"),
"status": response_obj.get("status") or "completed",
"output": response_obj.get("output") if isinstance(response_obj.get("output"), list) else [],
"model": response_obj.get("model") if isinstance(response_obj.get("model"), str) else model,
}
if not isinstance(compact["id"], str) or not compact["id"]:
compact["id"] = "resp"
if not isinstance(compact["created_at"], int):
compact["created_at"] = 0
return {k: v for k, v in compact.items() if v is not None}


def response_object_from_events(events: List[Dict[str, Any]], model: str | None = None) -> Dict[str, Any] | None:
response_obj: Dict[str, Any] | None = None
text_parts: List[str] = []
done_items: List[tuple[int, Dict[str, Any]]] = []
for evt in events:
response = evt.get("response")
if isinstance(response, dict):
response_obj = response
kind = evt.get("type")
if kind == "response.output_text.delta" and isinstance(evt.get("delta"), str):
text_parts.append(evt["delta"])
elif kind == "response.output_item.done" and isinstance(evt.get("item"), dict):
index = evt.get("output_index")
done_items.append((index if isinstance(index, int) else len(done_items), evt["item"]))
if response_obj is None:
return None
compact = compact_response_object(response_obj, model)
if not compact.get("output"):
if done_items:
compact["output"] = [item for _, item in sorted(done_items, key=lambda item: item[0])]
elif text_parts:
compact["output"] = [
{
"id": f"{compact['id']}_msg",
"type": "message",
"role": "assistant",
"content": [
{
"type": "output_text",
"text": "".join(text_parts),
"annotations": [],
}
],
}
]
return compact


def aggregate_response_from_sse(
upstream: Any,
*,
on_event: Any | None = None,
model: str | None = None,
) -> tuple[Dict[str, Any] | None, Dict[str, Any] | None]:
response_obj: Dict[str, Any] | None = None
events: List[Dict[str, Any]] = []
error_obj: Dict[str, Any] | None = None
try:
for evt in iter_sse_event_payloads(upstream):
events.append(evt)
if callable(on_event):
try:
on_event(evt)
except Exception:
pass
response = evt.get("response")
if isinstance(response, dict):
response_obj = response
kind = evt.get("type")
if kind == "response.failed":
if isinstance(response, dict) and isinstance(response.get("error"), dict):
Expand All @@ -204,7 +265,7 @@ def aggregate_response_from_sse(
break
finally:
upstream.close()
return response_obj, error_obj
return response_object_from_events(events, model), error_obj


def stream_upstream_bytes(
Expand Down
5 changes: 2 additions & 3 deletions chatmock/routes_ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context

from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
from .fast_mode import resolve_service_tier
from .limits import record_rate_limits_from_response
from .http import build_cors_headers
Expand Down Expand Up @@ -72,7 +71,7 @@ def ollama_version() -> Response:
return resp


def _instructions_for_model(model: str) -> str:
def _instructions_for_model(model: str) -> str | None:
return instructions_for_model(current_app.config, model)


Expand Down Expand Up @@ -308,7 +307,7 @@ def ollama_chat() -> Response:
upstream2, err2 = start_upstream_request(
normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
input_items,
instructions=BASE_INSTRUCTIONS,
instructions=_instructions_for_model(model),
tools=base_tools_only,
tool_choice=safe_choice,
parallel_tool_calls=parallel_tool_calls,
Expand Down
Loading