sashabaranov
diff --git a/‎.codecov.yml‎
Lines changed: 4 additions & 0 deletions b/‎.codecov.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/pr.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pr.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎chat.go‎
Lines changed: 58 additions & 2 deletions b/‎chat.go‎
Lines changed: 58 additions & 2 deletions
diff --git a/‎chat_test.go‎
Lines changed: 139 additions & 0 deletions b/‎chat_test.go‎
Lines changed: 139 additions & 0 deletions
diff --git a/‎client.go‎
Lines changed: 14 additions & 0 deletions b/‎client.go‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎completion.go‎
Lines changed: 1 addition & 1 deletion b/‎completion.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎completion_test.go‎
Lines changed: 1 addition & 1 deletion b/‎completion_test.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎embeddings.go‎
Lines changed: 31 additions & 1 deletion b/‎embeddings.go‎
Lines changed: 31 additions & 1 deletion
@@ -0,0 +1,4 @@
+coverage:
+  ignore:
+    - "examples/**"
+    - "internal/test/**"
@@ -16,7 +16,7 @@ jobs:
         go-version: '1.24'
     - name: Run vet
       run: |
-        go vet .
+        go vet -stdversion ./...
     - name: Run golangci-lint
       uses: golangci/golangci-lint-action@v7
       with:
 
@@ -5,6 +5,8 @@ import (
 	"encoding/json"
 	"errors"
 	"net/http"
+
+	"github.com/meguminnnnnnnnn/go-openai/jsonschema"
 )
 
 // Chat message role defined by the OpenAI API.
@@ -234,13 +236,49 @@ type ChatCompletionResponseFormatJSONSchema struct {
 	Strict      bool           `json:"strict"`
 }
 
+func (r *ChatCompletionResponseFormatJSONSchema) UnmarshalJSON(data []byte) error {
+	type rawJSONSchema struct {
+		Name        string          `json:"name"`
+		Description string          `json:"description,omitempty"`
+		Schema      json.RawMessage `json:"schema"`
+		Strict      bool            `json:"strict"`
+	}
+	var raw rawJSONSchema
+	if err := json.Unmarshal(data, &raw); err != nil {
+		return err
+	}
+	r.Name = raw.Name
+	r.Description = raw.Description
+	r.Strict = raw.Strict
+	if len(raw.Schema) > 0 && string(raw.Schema) != "null" {
+		var d jsonschema.Definition
+		err := json.Unmarshal(raw.Schema, &d)
+		if err != nil {
+			return err
+		}
+		r.Schema = &d
+	}
+	return nil
+}
+
+// ChatCompletionRequestExtensions contains third-party OpenAI API extensions
+// (e.g., vendor-specific implementations like vLLM).
+type ChatCompletionRequestExtensions struct {
+	// GuidedChoice is a vLLM-specific extension that restricts the model's output
+	// to one of the predefined string choices provided in this field. This feature
+	// is used to constrain the model's responses to a controlled set of options,
+	// ensuring predictable and consistent outputs in scenarios where specific
+	// choices are required.
+	GuidedChoice []string `json:"guided_choice,omitempty"`
+}
+
 // ChatCompletionRequest represents a request structure for chat completion API.
 type ChatCompletionRequest struct {
 	Model    string                  `json:"model"`
 	Messages []ChatCompletionMessage `json:"messages"`
 	// MaxTokens The maximum number of tokens that can be generated in the chat completion.
 	// This value can be used to control costs for text generated via API.
-	// This value is now deprecated in favor of max_completion_tokens, and is not compatible with o1 series models.
+	// Deprecated: use MaxCompletionTokens. Not compatible with o1-series models.
 	// refs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens
 	MaxTokens int `json:"max_tokens,omitempty"`
 	// MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,
@@ -286,7 +324,15 @@ type ChatCompletionRequest struct {
 	ReasoningEffort string `json:"reasoning_effort,omitempty"`
 	// Metadata to store with the completion.
 	Metadata map[string]string `json:"metadata,omitempty"`
-
+	// Configuration for a predicted output.
+	Prediction *Prediction `json:"prediction,omitempty"`
+	// ChatTemplateKwargs provides a way to add non-standard parameters to the request body.
+	// Additional kwargs to pass to the template renderer. Will be accessible by the chat template.
+	// Such as think mode for qwen3. "chat_template_kwargs": {"enable_thinking": false}
+	// https://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes
+	ChatTemplateKwargs map[string]any `json:"chat_template_kwargs,omitempty"`
+	// Specifies the latency tier to use for processing the request.
+	ServiceTier ServiceTier `json:"service_tier,omitempty"`
 	// Extra fields to be sent in the request.
 	// Useful for experimental features not yet officially supported.
 	extraFields map[string]any
@@ -386,6 +432,15 @@ const (
 	FinishReasonNull          FinishReason = "null"
 )
 
+type ServiceTier string
+
+const (
+	ServiceTierAuto     ServiceTier = "auto"
+	ServiceTierDefault  ServiceTier = "default"
+	ServiceTierFlex     ServiceTier = "flex"
+	ServiceTierPriority ServiceTier = "priority"
+)
+
 func (r FinishReason) MarshalJSON() ([]byte, error) {
 	if r == FinishReasonNull || r == "" {
 		return []byte("null"), nil
@@ -418,6 +473,7 @@ type ChatCompletionResponse struct {
 	Usage               Usage                  `json:"usage"`
 	SystemFingerprint   string                 `json:"system_fingerprint"`
 	PromptFilterResults []PromptFilterResult   `json:"prompt_filter_results,omitempty"`
+	ServiceTier         ServiceTier            `json:"service_tier,omitempty"`
 
 	httpHeader
 }
 
@@ -999,3 +999,142 @@ func TestChatCompletionRequestExtraFields(t *testing.T) {
 		t.Errorf("Expected overridden value 'new_value', got %v", gotFields["custom_field"])
 	}
 }
+
+func TestChatCompletionResponseFormatJSONSchema_UnmarshalJSON(t *testing.T) {
+	type args struct {
+		data []byte
+	}
+	tests := []struct {
+		name    string
+		args    args
+		wantErr bool
+	}{
+		{
+			"",
+			args{
+				data: []byte(`{
+      "name":   "math_response",
+      "strict": true,
+      "schema": {
+        "type": "object",
+        "properties": {
+          "steps": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "explanation": { "type": "string" },
+                "output":      { "type": "string" }
+              },
+              "required": ["explanation","output"],
+              "additionalProperties": false
+            }
+          },
+          "final_answer": { "type": "string" }
+        },
+        "required": ["steps","final_answer"],
+        "additionalProperties": false
+      }
+  }`),
+			},
+			false,
+		},
+		{
+			"",
+			args{
+				data: []byte(`{
+      "name":   "math_response",
+      "strict": true,
+      "schema": null
+  }`),
+			},
+			false,
+		},
+		{
+			"",
+			args{
+				data: []byte(`[123,456]`),
+			},
+			true,
+		},
+		{
+			"",
+			args{
+				data: []byte(`{
+      "name":   "math_response",
+      "strict": true,
+      "schema": 123456
+  }`),
+			},
+			true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var r openai.ChatCompletionResponseFormatJSONSchema
+			err := r.UnmarshalJSON(tt.args.data)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
+			}
+		})
+	}
+}
+
+func TestChatCompletionRequest_UnmarshalJSON(t *testing.T) {
+	type args struct {
+		bs []byte
+	}
+	tests := []struct {
+		name    string
+		args    args
+		wantErr bool
+	}{
+		{
+			"",
+			args{bs: []byte(`{
+  "model": "llama3-1b",
+  "messages": [
+    { "role": "system", "content": "You are a helpful math tutor." },
+    { "role": "user",   "content": "solve 8x + 31 = 2" }
+  ],
+  "response_format": {
+    "type": "json_schema",
+    "json_schema": {
+      "name":   "math_response",
+      "strict": true,
+      "schema": {
+        "type": "object",
+        "properties": {
+          "steps": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "explanation": { "type": "string" },
+                "output":      { "type": "string" }
+              },
+              "required": ["explanation","output"],
+              "additionalProperties": false
+            }
+          },
+          "final_answer": { "type": "string" }
+        },
+        "required": ["steps","final_answer"],
+        "additionalProperties": false
+      }
+    }
+  }
+}`)},
+			false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var m openai.ChatCompletionRequest
+			err := json.Unmarshal(tt.args.bs, &m)
+			if err != nil {
+				t.Errorf("UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
+			}
+		})
+	}
+}
@@ -84,6 +84,20 @@ func withBody(body any) requestOption {
 	}
 }
 
+func withExtraBody(extraBody map[string]any) requestOption {
+	return func(args *requestOptions) {
+		// Assert that args.body is a map[string]any.
+		bodyMap, ok := args.body.(map[string]any)
+		if ok {
+			// If it's a map[string]any then only add extraBody
+			// fields to args.body otherwise keep only fields in request struct.
+			for key, value := range extraBody {
+				bodyMap[key] = value
+			}
+		}
+	}
+}
+
 func withContentType(contentType string) requestOption {
 	return func(args *requestOptions) {
 		args.header.Set("Content-Type", contentType)
 
@@ -242,7 +242,7 @@ type CompletionResponse struct {
 	Created int64              `json:"created"`
 	Model   string             `json:"model"`
 	Choices []CompletionChoice `json:"choices"`
-	Usage   Usage              `json:"usage"`
+	Usage   *Usage             `json:"usage,omitempty"`
 
 	httpHeader
 }
 
@@ -192,7 +192,7 @@ func handleCompletionEndpoint(w http.ResponseWriter, r *http.Request) {
 	}
 	inputTokens *= n
 	completionTokens := completionReq.MaxTokens * len(prompts) * n
-	res.Usage = openai.Usage{
+	res.Usage = &openai.Usage{
 		PromptTokens:     inputTokens,
 		CompletionTokens: completionTokens,
 		TotalTokens:      inputTokens + completionTokens,
 
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/base64"
 	"encoding/binary"
+	"encoding/json"
 	"errors"
 	"math"
 	"net/http"
@@ -160,6 +161,9 @@ type EmbeddingRequest struct {
 	// Dimensions The number of dimensions the resulting output embeddings should have.
 	// Only supported in text-embedding-3 and later models.
 	Dimensions int `json:"dimensions,omitempty"`
+	// The ExtraBody field allows for the inclusion of arbitrary key-value pairs
+	// in the request body that may not be explicitly defined in this struct.
+	ExtraBody map[string]any `json:"extra_body,omitempty"`
 }
 
 func (r EmbeddingRequest) Convert() EmbeddingRequest {
@@ -187,6 +191,9 @@ type EmbeddingRequestStrings struct {
 	// Dimensions The number of dimensions the resulting output embeddings should have.
 	// Only supported in text-embedding-3 and later models.
 	Dimensions int `json:"dimensions,omitempty"`
+	// The ExtraBody field allows for the inclusion of arbitrary key-value pairs
+	// in the request body that may not be explicitly defined in this struct.
+	ExtraBody map[string]any `json:"extra_body,omitempty"`
 }
 
 func (r EmbeddingRequestStrings) Convert() EmbeddingRequest {
@@ -196,6 +203,7 @@ func (r EmbeddingRequestStrings) Convert() EmbeddingRequest {
 		User:           r.User,
 		EncodingFormat: r.EncodingFormat,
 		Dimensions:     r.Dimensions,
+		ExtraBody:      r.ExtraBody,
 	}
 }
 
@@ -219,6 +227,9 @@ type EmbeddingRequestTokens struct {
 	// Dimensions The number of dimensions the resulting output embeddings should have.
 	// Only supported in text-embedding-3 and later models.
 	Dimensions int `json:"dimensions,omitempty"`
+	// The ExtraBody field allows for the inclusion of arbitrary key-value pairs
+	// in the request body that may not be explicitly defined in this struct.
+	ExtraBody map[string]any `json:"extra_body,omitempty"`
 }
 
 func (r EmbeddingRequestTokens) Convert() EmbeddingRequest {
@@ -228,6 +239,7 @@ func (r EmbeddingRequestTokens) Convert() EmbeddingRequest {
 		User:           r.User,
 		EncodingFormat: r.EncodingFormat,
 		Dimensions:     r.Dimensions,
+		ExtraBody:      r.ExtraBody,
 	}
 }
 
@@ -241,11 +253,29 @@ func (c *Client) CreateEmbeddings(
 	conv EmbeddingRequestConverter,
 ) (res EmbeddingResponse, err error) {
 	baseReq := conv.Convert()
+
+	// The body map is used to dynamically construct the request payload for the embedding API.
+	// Instead of relying on a fixed struct, the body map allows for flexible inclusion of fields
+	// based on their presence, avoiding unnecessary or empty fields in the request.
+	extraBody := baseReq.ExtraBody
+	baseReq.ExtraBody = nil
+
+	// Serialize baseReq to JSON
+	jsonData, err := json.Marshal(baseReq)
+	if err != nil {
+		return
+	}
+
+	// Deserialize JSON to map[string]any
+	var body map[string]any
+	_ = json.Unmarshal(jsonData, &body)
+
 	req, err := c.newRequest(
 		ctx,
 		http.MethodPost,
 		c.fullURL("/embeddings", withModel(string(baseReq.Model))),
-		withBody(baseReq),
+		withBody(body),           // Main request body.
+		withExtraBody(extraBody), // Merge ExtraBody fields.
 	)
 	if err != nil {
 		return
Original file line number	Diff line number	Diff line change
`@@ -242,7 +242,7 @@ type CompletionResponse struct {`
`242`	`242`	Created int64 `json:"created"`
`243`	`243`	Model string `json:"model"`
`244`	`244`	Choices []CompletionChoice `json:"choices"`
`245`		- Usage Usage `json:"usage"`
	`245`	+ Usage *Usage `json:"usage,omitempty"`
`246`	`246`
`247`	`247`	`httpHeader`
`248`	`248`	`}`
Original file line number	Diff line number	Diff line change
`@@ -192,7 +192,7 @@ func handleCompletionEndpoint(w http.ResponseWriter, r *http.Request) {`
`192`	`192`	`}`
`193`	`193`	`inputTokens *= n`
`194`	`194`	`completionTokens := completionReq.MaxTokens * len(prompts) * n`
`195`		`- res.Usage = openai.Usage{`
	`195`	`+ res.Usage = &openai.Usage{`
`196`	`196`	`PromptTokens: inputTokens,`
`197`	`197`	`CompletionTokens: completionTokens,`
`198`	`198`	`TotalTokens: inputTokens + completionTokens,`