diff --git a/pkg/codexauth/auth.go b/pkg/codexauth/auth.go new file mode 100644 index 000000000..e684d1035 --- /dev/null +++ b/pkg/codexauth/auth.go @@ -0,0 +1,97 @@ +package codexauth + +import ( + "encoding/base64" + "encoding/json" + "errors" + "os" + "path/filepath" + "strings" +) + +const authFileName = "auth.json" + +// Auth represents the persisted local Codex CLI authentication state. +type Auth struct { + OpenAIAPIKey string `json:"OPENAI_API_KEY"` + AuthMode string `json:"auth_mode"` + Tokens struct { + AccessToken string `json:"access_token"` + RefreshToken string `json:"refresh_token"` + IDToken string `json:"id_token"` + AccountID string `json:"account_id"` + } `json:"tokens"` +} + +// Claims contains the subset of JWT claims used by docker-agent. +type Claims struct { + ChatGPTAccountID string `json:"chatgpt_account_id"` +} + +// DefaultPath returns the default location of the local Codex auth file. +func DefaultPath() string { + home, err := os.UserHomeDir() + if err != nil { + return "" + } + return filepath.Join(home, ".codex", authFileName) +} + +// Load reads the local Codex auth file from the default location. +func Load() (*Auth, error) { + path := DefaultPath() + if path == "" { + return nil, errors.New("unable to resolve Codex auth path") + } + + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + + var auth Auth + if err := json.Unmarshal(data, &auth); err != nil { + return nil, err + } + return &auth, nil +} + +// HasChatGPTAuth reports whether the auth file contains a ChatGPT-backed login. +func (a *Auth) HasChatGPTAuth() bool { + return strings.EqualFold(a.AuthMode, "chatgpt") && a.Tokens.AccessToken != "" +} + +// AccountID returns the ChatGPT account ID from the auth file or token claims. +func (a *Auth) AccountID() string { + if a.Tokens.AccountID != "" { + return a.Tokens.AccountID + } + + for _, token := range []string{a.Tokens.IDToken, a.Tokens.AccessToken} { + if claims, err := parseClaims(token); err == nil && claims.ChatGPTAccountID != "" { + return claims.ChatGPTAccountID + } + } + + return "" +} + +func parseClaims(jwt string) (*Claims, error) { + parts := strings.Split(jwt, ".") + if len(parts) < 2 { + return nil, errors.New("invalid JWT format") + } + + payload, err := base64.RawURLEncoding.DecodeString(parts[1]) + if err != nil { + return nil, err + } + + var raw struct { + Auth Claims `json:"https://api.openai.com/auth"` + } + if err := json.Unmarshal(payload, &raw); err != nil { + return nil, err + } + return &raw.Auth, nil +} diff --git a/pkg/config/auto.go b/pkg/config/auto.go index 86f2b4fe5..9e11979e0 100644 --- a/pkg/config/auto.go +++ b/pkg/config/auto.go @@ -5,6 +5,7 @@ import ( "fmt" "strings" + "github.com/docker/docker-agent/pkg/codexauth" "github.com/docker/docker-agent/pkg/config/latest" "github.com/docker/docker-agent/pkg/environment" ) @@ -36,6 +37,11 @@ var cloudProviders = []providerConfig{ }, "AWS_ACCESS_KEY_ID (or AWS_PROFILE, AWS_ROLE_ARN, AWS_BEARER_TOKEN_BEDROCK)"}, } +var hasCodexChatGPTAuth = func() bool { + auth, err := codexauth.Load() + return err == nil && auth.HasChatGPTAuth() +} + // AutoModelFallbackError is returned when auto model selection fails because // no providers are available (no API keys configured and DMR not installed). type AutoModelFallbackError struct{} @@ -54,6 +60,7 @@ func (e *AutoModelFallbackError) Error() string { var DefaultModels = map[string]string{ "openai": "gpt-5-mini", + "openai-codex": "gpt-5.4", "anthropic": "claude-sonnet-4-5", "google": "gemini-2.5-flash", "dmr": "ai/qwen3:latest", @@ -62,6 +69,14 @@ var DefaultModels = map[string]string{ } func AvailableProviders(ctx context.Context, modelsGateway string, env environment.Provider) []string { + return availableProviders(ctx, modelsGateway, env, hasCodexChatGPTAuth) +} + +func AutoModelConfig(ctx context.Context, modelsGateway string, env environment.Provider, defaultModel *latest.ModelConfig) latest.ModelConfig { + return autoModelConfig(ctx, modelsGateway, env, defaultModel, hasCodexChatGPTAuth) +} + +func availableProviders(ctx context.Context, modelsGateway string, env environment.Provider, codexAuthFn func() bool) []string { if modelsGateway != "" { // Default to anthropic when using a gateway return []string{"anthropic"} @@ -78,13 +93,17 @@ func AvailableProviders(ctx context.Context, modelsGateway string, env environme } } + if codexAuthFn != nil && codexAuthFn() { + providers = append(providers, "openai-codex") + } + // DMR is always the final fallback providers = append(providers, "dmr") return providers } -func AutoModelConfig(ctx context.Context, modelsGateway string, env environment.Provider, defaultModel *latest.ModelConfig) latest.ModelConfig { +func autoModelConfig(ctx context.Context, modelsGateway string, env environment.Provider, defaultModel *latest.ModelConfig, codexAuthFn func() bool) latest.ModelConfig { // If user specified a default model config, use it (with defaults for unset fields) if defaultModel != nil && defaultModel.Provider != "" && defaultModel.Model != "" { result := *defaultModel @@ -94,7 +113,7 @@ func AutoModelConfig(ctx context.Context, modelsGateway string, env environment. return result } - availableProviders := AvailableProviders(ctx, modelsGateway, env) + availableProviders := availableProviders(ctx, modelsGateway, env, codexAuthFn) firstAvailable := availableProviders[0] return latest.ModelConfig{ diff --git a/pkg/model/provider/openai/helpers.go b/pkg/model/provider/openai/helpers.go new file mode 100644 index 000000000..fa0c11663 --- /dev/null +++ b/pkg/model/provider/openai/helpers.go @@ -0,0 +1,18 @@ +package openai + +import ( + "github.com/openai/openai-go/v3/packages/ssestream" + "github.com/openai/openai-go/v3/responses" + + "github.com/docker/docker-agent/pkg/chat" +) + +// ConvertMessagesToResponseInput exposes the shared Responses API message conversion. +func ConvertMessagesToResponseInput(messages []chat.Message) []responses.ResponseInputItemUnionParam { + return convertMessagesToResponseInput(messages) +} + +// NewResponseSSEAdapter exposes the shared Responses API SSE stream adapter. +func NewResponseSSEAdapter(stream *ssestream.Stream[responses.ResponseStreamEventUnion], trackUsage bool) chat.MessageStream { + return newResponseStreamAdapter(stream, trackUsage) +} diff --git a/pkg/model/provider/openaicodex/client.go b/pkg/model/provider/openaicodex/client.go new file mode 100644 index 000000000..8bcdefb2c --- /dev/null +++ b/pkg/model/provider/openaicodex/client.go @@ -0,0 +1,223 @@ +package openaicodex + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "log/slog" + "strings" + + oai "github.com/openai/openai-go/v3" + "github.com/openai/openai-go/v3/option" + "github.com/openai/openai-go/v3/packages/param" + "github.com/openai/openai-go/v3/responses" + "github.com/openai/openai-go/v3/shared" + + "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/codexauth" + "github.com/docker/docker-agent/pkg/config/latest" + "github.com/docker/docker-agent/pkg/effort" + "github.com/docker/docker-agent/pkg/environment" + "github.com/docker/docker-agent/pkg/httpclient" + "github.com/docker/docker-agent/pkg/model/provider/base" + openairesponses "github.com/docker/docker-agent/pkg/model/provider/openai" + "github.com/docker/docker-agent/pkg/model/provider/options" + "github.com/docker/docker-agent/pkg/tools" +) + +const ( + codexBaseURL = "https://chatgpt.com/backend-api/codex" + codexOriginator = "codex_cli_rs" +) + +type Client struct { + base.Config + clientFn func() (*oai.Client, error) +} + +// NewClient creates a client for the ChatGPT-backed Codex provider. +func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Provider, opts ...options.Opt) (*Client, error) { + if cfg == nil { + return nil, errors.New("model configuration is required") + } + if env == nil { + return nil, errors.New("environment provider is required") + } + + var globalOptions options.ModelOptions + for _, opt := range opts { + opt(&globalOptions) + } + + clientFn := func() (*oai.Client, error) { + auth, err := codexauth.Load() + if err != nil { + return nil, fmt.Errorf("load Codex auth: %w", err) + } + if !auth.HasChatGPTAuth() { + return nil, errors.New("Codex ChatGPT login not found; run `codex login` first") + } + + accountID := auth.AccountID() + if accountID == "" { + return nil, errors.New("Codex auth is missing chatgpt_account_id") + } + + httpClient := httpclient.NewHTTPClient(ctx, + httpclient.WithHeader("Authorization", "Bearer "+auth.Tokens.AccessToken), + httpclient.WithHeader("chatgpt-account-id", accountID), + httpclient.WithHeader("OpenAI-Beta", "responses=experimental"), + httpclient.WithHeader("originator", codexOriginator), + ) + + client := oai.NewClient( + option.WithAPIKey("chatgpt-oauth"), + option.WithBaseURL(codexBaseURL), + option.WithHTTPClient(httpClient), + ) + return &client, nil + } + + return &Client{ + Config: base.Config{ + ModelConfig: *cfg, + ModelOptions: globalOptions, + Env: env, + }, + clientFn: clientFn, + }, nil +} + +// CreateChatCompletionStream creates a streaming response request for the Codex backend. +func (c *Client) CreateChatCompletionStream( + ctx context.Context, + messages []chat.Message, + requestTools []tools.Tool, +) (chat.MessageStream, error) { + if len(messages) == 0 { + return nil, errors.New("at least one message is required") + } + + params := responses.ResponseNewParams{ + Model: c.ModelConfig.Model, + Store: param.NewOpt(false), + Include: []responses.ResponseIncludable{ + "reasoning.encrypted_content", + }, + } + instructions, inputMessages := splitInstructions(messages) + if instructions != "" { + params.Instructions = param.NewOpt(instructions) + } + params.Input.OfInputItemList = openairesponses.ConvertMessagesToResponseInput(inputMessages) + + if c.ModelConfig.Temperature != nil { + params.Temperature = param.NewOpt(*c.ModelConfig.Temperature) + } + if c.ModelConfig.TopP != nil { + params.TopP = param.NewOpt(*c.ModelConfig.TopP) + } + if len(requestTools) > 0 { + toolsParam := make([]responses.ToolUnionParam, len(requestTools)) + for i, tool := range requestTools { + parameters, err := openairesponses.ConvertParametersToSchema(tool.Parameters) + if err != nil { + return nil, err + } + toolsParam[i] = responses.ToolUnionParam{ + OfFunction: &responses.FunctionToolParam{ + Name: tool.Name, + Description: param.NewOpt(tool.Description), + Parameters: parameters, + Strict: param.NewOpt(true), + }, + } + } + params.Tools = toolsParam + if c.ModelConfig.ParallelToolCalls != nil { + params.ParallelToolCalls = param.NewOpt(*c.ModelConfig.ParallelToolCalls) + } + } + + if !c.ModelOptions.NoThinking() { + params.Reasoning = shared.ReasoningParam{ + Summary: shared.ReasoningSummaryAuto, + } + if c.ModelConfig.ThinkingBudget != nil { + effortStr, err := openAIReasoningEffort(c.ModelConfig.ThinkingBudget) + if err != nil { + return nil, err + } + params.Reasoning.Effort = shared.ReasoningEffort(effortStr) + } + } + + if structuredOutput := c.ModelOptions.StructuredOutput(); structuredOutput != nil { + params.Text.Format.OfJSONSchema = &responses.ResponseFormatTextJSONSchemaConfigParam{ + Name: structuredOutput.Name, + Description: param.NewOpt(structuredOutput.Description), + Schema: structuredOutput.Schema, + Strict: param.NewOpt(structuredOutput.Strict), + } + } + + if requestJSON, err := json.Marshal(params); err == nil { + slog.Debug("OpenAI Codex responses request", "request", string(requestJSON)) + } + + client, err := c.clientFn() + if err != nil { + return nil, err + } + + trackUsage := c.ModelConfig.TrackUsage == nil || *c.ModelConfig.TrackUsage + stream := client.Responses.NewStreaming(ctx, params) + return openairesponses.NewResponseSSEAdapter(stream, trackUsage), nil +} + +// splitInstructions moves system messages into the top-level instructions field. +func splitInstructions(messages []chat.Message) (string, []chat.Message) { + var instructions []string + inputMessages := make([]chat.Message, 0, len(messages)) + + for _, msg := range messages { + if msg.Role != chat.MessageRoleSystem { + inputMessages = append(inputMessages, msg) + continue + } + + text := strings.TrimSpace(msg.Content) + if text == "" && len(msg.MultiContent) > 0 { + var parts []string + for _, part := range msg.MultiContent { + if part.Type == chat.MessagePartTypeText && strings.TrimSpace(part.Text) != "" { + parts = append(parts, strings.TrimSpace(part.Text)) + } + } + text = strings.Join(parts, "\n\n") + } + + if text != "" { + instructions = append(instructions, text) + } + } + + return strings.Join(instructions, "\n\n"), inputMessages +} + +func openAIReasoningEffort(b *latest.ThinkingBudget) (string, error) { + if b == nil { + return string(effort.Medium), nil + } + level, ok := b.EffortLevel() + if !ok { + return "", fmt.Errorf("openai-codex provider expects string reasoning effort, got token budget") + } + switch level { + case effort.None, effort.Minimal, effort.Low, effort.Medium, effort.High, effort.XHigh: + return string(level), nil + default: + return "", fmt.Errorf("invalid openai-codex thinking_budget %q", level) + } +} diff --git a/pkg/model/provider/provider.go b/pkg/model/provider/provider.go index 749bb9536..a3116435b 100644 --- a/pkg/model/provider/provider.go +++ b/pkg/model/provider/provider.go @@ -17,6 +17,7 @@ import ( "github.com/docker/docker-agent/pkg/model/provider/dmr" "github.com/docker/docker-agent/pkg/model/provider/gemini" "github.com/docker/docker-agent/pkg/model/provider/openai" + "github.com/docker/docker-agent/pkg/model/provider/openaicodex" "github.com/docker/docker-agent/pkg/model/provider/options" "github.com/docker/docker-agent/pkg/model/provider/rulebased" "github.com/docker/docker-agent/pkg/model/provider/vertexai" @@ -35,6 +36,7 @@ type Alias struct { // These are the provider types that have direct implementations (not aliases). var CoreProviders = []string{ "openai", + "openai-codex", "anthropic", "google", "dmr", @@ -240,6 +242,8 @@ func createDirectProvider(ctx context.Context, cfg *latest.ModelConfig, env envi switch providerType { case "openai", "openai_chatcompletions", "openai_responses": return openai.NewClient(ctx, enhancedCfg, env, opts...) + case "openai-codex": + return openaicodex.NewClient(ctx, enhancedCfg, env, opts...) case "anthropic": return anthropic.NewClient(ctx, enhancedCfg, env, opts...) case "google":