|
5 | 5 | "encoding/json" |
6 | 6 | "errors" |
7 | 7 | "net/http" |
| 8 | + |
| 9 | + "github.com/meguminnnnnnnnn/go-openai/jsonschema" |
8 | 10 | ) |
9 | 11 |
|
10 | 12 | // Chat message role defined by the OpenAI API. |
@@ -234,13 +236,49 @@ type ChatCompletionResponseFormatJSONSchema struct { |
234 | 236 | Strict bool `json:"strict"` |
235 | 237 | } |
236 | 238 |
|
| 239 | +func (r *ChatCompletionResponseFormatJSONSchema) UnmarshalJSON(data []byte) error { |
| 240 | + type rawJSONSchema struct { |
| 241 | + Name string `json:"name"` |
| 242 | + Description string `json:"description,omitempty"` |
| 243 | + Schema json.RawMessage `json:"schema"` |
| 244 | + Strict bool `json:"strict"` |
| 245 | + } |
| 246 | + var raw rawJSONSchema |
| 247 | + if err := json.Unmarshal(data, &raw); err != nil { |
| 248 | + return err |
| 249 | + } |
| 250 | + r.Name = raw.Name |
| 251 | + r.Description = raw.Description |
| 252 | + r.Strict = raw.Strict |
| 253 | + if len(raw.Schema) > 0 && string(raw.Schema) != "null" { |
| 254 | + var d jsonschema.Definition |
| 255 | + err := json.Unmarshal(raw.Schema, &d) |
| 256 | + if err != nil { |
| 257 | + return err |
| 258 | + } |
| 259 | + r.Schema = &d |
| 260 | + } |
| 261 | + return nil |
| 262 | +} |
| 263 | + |
| 264 | +// ChatCompletionRequestExtensions contains third-party OpenAI API extensions |
| 265 | +// (e.g., vendor-specific implementations like vLLM). |
| 266 | +type ChatCompletionRequestExtensions struct { |
| 267 | + // GuidedChoice is a vLLM-specific extension that restricts the model's output |
| 268 | + // to one of the predefined string choices provided in this field. This feature |
| 269 | + // is used to constrain the model's responses to a controlled set of options, |
| 270 | + // ensuring predictable and consistent outputs in scenarios where specific |
| 271 | + // choices are required. |
| 272 | + GuidedChoice []string `json:"guided_choice,omitempty"` |
| 273 | +} |
| 274 | + |
237 | 275 | // ChatCompletionRequest represents a request structure for chat completion API. |
238 | 276 | type ChatCompletionRequest struct { |
239 | 277 | Model string `json:"model"` |
240 | 278 | Messages []ChatCompletionMessage `json:"messages"` |
241 | 279 | // MaxTokens The maximum number of tokens that can be generated in the chat completion. |
242 | 280 | // This value can be used to control costs for text generated via API. |
243 | | - // This value is now deprecated in favor of max_completion_tokens, and is not compatible with o1 series models. |
| 281 | + // Deprecated: use MaxCompletionTokens. Not compatible with o1-series models. |
244 | 282 | // refs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens |
245 | 283 | MaxTokens int `json:"max_tokens,omitempty"` |
246 | 284 | // MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion, |
@@ -286,7 +324,15 @@ type ChatCompletionRequest struct { |
286 | 324 | ReasoningEffort string `json:"reasoning_effort,omitempty"` |
287 | 325 | // Metadata to store with the completion. |
288 | 326 | Metadata map[string]string `json:"metadata,omitempty"` |
289 | | - |
| 327 | + // Configuration for a predicted output. |
| 328 | + Prediction *Prediction `json:"prediction,omitempty"` |
| 329 | + // ChatTemplateKwargs provides a way to add non-standard parameters to the request body. |
| 330 | + // Additional kwargs to pass to the template renderer. Will be accessible by the chat template. |
| 331 | + // Such as think mode for qwen3. "chat_template_kwargs": {"enable_thinking": false} |
| 332 | + // https://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes |
| 333 | + ChatTemplateKwargs map[string]any `json:"chat_template_kwargs,omitempty"` |
| 334 | + // Specifies the latency tier to use for processing the request. |
| 335 | + ServiceTier ServiceTier `json:"service_tier,omitempty"` |
290 | 336 | // Extra fields to be sent in the request. |
291 | 337 | // Useful for experimental features not yet officially supported. |
292 | 338 | extraFields map[string]any |
@@ -386,6 +432,15 @@ const ( |
386 | 432 | FinishReasonNull FinishReason = "null" |
387 | 433 | ) |
388 | 434 |
|
| 435 | +type ServiceTier string |
| 436 | + |
| 437 | +const ( |
| 438 | + ServiceTierAuto ServiceTier = "auto" |
| 439 | + ServiceTierDefault ServiceTier = "default" |
| 440 | + ServiceTierFlex ServiceTier = "flex" |
| 441 | + ServiceTierPriority ServiceTier = "priority" |
| 442 | +) |
| 443 | + |
389 | 444 | func (r FinishReason) MarshalJSON() ([]byte, error) { |
390 | 445 | if r == FinishReasonNull || r == "" { |
391 | 446 | return []byte("null"), nil |
@@ -418,6 +473,7 @@ type ChatCompletionResponse struct { |
418 | 473 | Usage Usage `json:"usage"` |
419 | 474 | SystemFingerprint string `json:"system_fingerprint"` |
420 | 475 | PromptFilterResults []PromptFilterResult `json:"prompt_filter_results,omitempty"` |
| 476 | + ServiceTier ServiceTier `json:"service_tier,omitempty"` |
421 | 477 |
|
422 | 478 | httpHeader |
423 | 479 | } |
|
0 commit comments