From 9add1c348607c14e8fde9966713c97f9a2351919 Mon Sep 17 00:00:00 2001 From: Ivan Timofeev Date: Fri, 20 Sep 2024 23:40:24 +0300 Subject: [PATCH] add max_completions_tokens for o1 series models (#857) * add max_completions_tokens for o1 series models * add validation for o1 series models validataion + beta limitations --- chat.go | 35 +++++--- chat_stream.go | 4 + chat_stream_test.go | 21 +++++ chat_test.go | 211 ++++++++++++++++++++++++++++++++++++++++++++ completion.go | 82 +++++++++++++++++ 5 files changed, 341 insertions(+), 12 deletions(-) diff --git a/chat.go b/chat.go index dc60f35b9..d47c95e4f 100644 --- a/chat.go +++ b/chat.go @@ -200,18 +200,25 @@ type ChatCompletionResponseFormatJSONSchema struct { // ChatCompletionRequest represents a request structure for chat completion API. type ChatCompletionRequest struct { - Model string `json:"model"` - Messages []ChatCompletionMessage `json:"messages"` - MaxTokens int `json:"max_tokens,omitempty"` - Temperature float32 `json:"temperature,omitempty"` - TopP float32 `json:"top_p,omitempty"` - N int `json:"n,omitempty"` - Stream bool `json:"stream,omitempty"` - Stop []string `json:"stop,omitempty"` - PresencePenalty float32 `json:"presence_penalty,omitempty"` - ResponseFormat *ChatCompletionResponseFormat `json:"response_format,omitempty"` - Seed *int `json:"seed,omitempty"` - FrequencyPenalty float32 `json:"frequency_penalty,omitempty"` + Model string `json:"model"` + Messages []ChatCompletionMessage `json:"messages"` + // MaxTokens The maximum number of tokens that can be generated in the chat completion. + // This value can be used to control costs for text generated via API. + // This value is now deprecated in favor of max_completion_tokens, and is not compatible with o1 series models. + // refs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens + MaxTokens int `json:"max_tokens,omitempty"` + // MaxCompletionsTokens An upper bound for the number of tokens that can be generated for a completion, + // including visible output tokens and reasoning tokens https://platform.openai.com/docs/guides/reasoning + MaxCompletionsTokens int `json:"max_completions_tokens,omitempty"` + Temperature float32 `json:"temperature,omitempty"` + TopP float32 `json:"top_p,omitempty"` + N int `json:"n,omitempty"` + Stream bool `json:"stream,omitempty"` + Stop []string `json:"stop,omitempty"` + PresencePenalty float32 `json:"presence_penalty,omitempty"` + ResponseFormat *ChatCompletionResponseFormat `json:"response_format,omitempty"` + Seed *int `json:"seed,omitempty"` + FrequencyPenalty float32 `json:"frequency_penalty,omitempty"` // LogitBias is must be a token id string (specified by their token ID in the tokenizer), not a word string. // incorrect: `"logit_bias":{"You": 6}`, correct: `"logit_bias":{"1639": 6}` // refs: https://platform.openai.com/docs/api-reference/chat/create#chat/create-logit_bias @@ -364,6 +371,10 @@ func (c *Client) CreateChatCompletion( return } + if err = validateRequestForO1Models(request); err != nil { + return + } + req, err := c.newRequest( ctx, http.MethodPost, diff --git a/chat_stream.go b/chat_stream.go index 3f90bc019..f43d01834 100644 --- a/chat_stream.go +++ b/chat_stream.go @@ -60,6 +60,10 @@ func (c *Client) CreateChatCompletionStream( } request.Stream = true + if err = validateRequestForO1Models(request); err != nil { + return + } + req, err := c.newRequest( ctx, http.MethodPost, diff --git a/chat_stream_test.go b/chat_stream_test.go index 63e45ee23..2e7c99b45 100644 --- a/chat_stream_test.go +++ b/chat_stream_test.go @@ -36,6 +36,27 @@ func TestChatCompletionsStreamWrongModel(t *testing.T) { } } +func TestChatCompletionsStreamWithO1BetaLimitations(t *testing.T) { + config := openai.DefaultConfig("whatever") + config.BaseURL = "http://localhost/v1/chat/completions" + client := openai.NewClientWithConfig(config) + ctx := context.Background() + + req := openai.ChatCompletionRequest{ + Model: openai.O1Preview, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleUser, + Content: "Hello!", + }, + }, + } + _, err := client.CreateChatCompletionStream(ctx, req) + if !errors.Is(err, openai.ErrO1BetaLimitationsStreaming) { + t.Fatalf("CreateChatCompletion should return ErrO1BetaLimitationsStreaming, but returned: %v", err) + } +} + func TestCreateChatCompletionStream(t *testing.T) { client, server, teardown := setupOpenAITestServer() defer teardown() diff --git a/chat_test.go b/chat_test.go index 37dc09d4d..a54dd35e0 100644 --- a/chat_test.go +++ b/chat_test.go @@ -52,6 +52,199 @@ func TestChatCompletionsWrongModel(t *testing.T) { checks.ErrorIs(t, err, openai.ErrChatCompletionInvalidModel, msg) } +func TestO1ModelsChatCompletionsDeprecatedFields(t *testing.T) { + tests := []struct { + name string + in openai.ChatCompletionRequest + expectedError error + }{ + { + name: "o1-preview_MaxTokens_deprecated", + in: openai.ChatCompletionRequest{ + MaxTokens: 5, + Model: openai.O1Preview, + }, + expectedError: openai.ErrO1MaxTokensDeprecated, + }, + { + name: "o1-mini_MaxTokens_deprecated", + in: openai.ChatCompletionRequest{ + MaxTokens: 5, + Model: openai.O1Mini, + }, + expectedError: openai.ErrO1MaxTokensDeprecated, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := openai.DefaultConfig("whatever") + config.BaseURL = "http://localhost/v1" + client := openai.NewClientWithConfig(config) + ctx := context.Background() + + _, err := client.CreateChatCompletion(ctx, tt.in) + checks.HasError(t, err) + msg := fmt.Sprintf("CreateChatCompletion should return wrong model error, returned: %s", err) + checks.ErrorIs(t, err, tt.expectedError, msg) + }) + } +} + +func TestO1ModelsChatCompletionsBetaLimitations(t *testing.T) { + tests := []struct { + name string + in openai.ChatCompletionRequest + expectedError error + }{ + { + name: "log_probs_unsupported", + in: openai.ChatCompletionRequest{ + MaxCompletionsTokens: 1000, + LogProbs: true, + Model: openai.O1Preview, + }, + expectedError: openai.ErrO1BetaLimitationsLogprobs, + }, + { + name: "message_type_unsupported", + in: openai.ChatCompletionRequest{ + MaxCompletionsTokens: 1000, + Model: openai.O1Mini, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleSystem, + }, + }, + }, + expectedError: openai.ErrO1BetaLimitationsMessageTypes, + }, + { + name: "tool_unsupported", + in: openai.ChatCompletionRequest{ + MaxCompletionsTokens: 1000, + Model: openai.O1Mini, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleUser, + }, + { + Role: openai.ChatMessageRoleAssistant, + }, + }, + Tools: []openai.Tool{ + { + Type: openai.ToolTypeFunction, + }, + }, + }, + expectedError: openai.ErrO1BetaLimitationsTools, + }, + { + name: "set_temperature_unsupported", + in: openai.ChatCompletionRequest{ + MaxCompletionsTokens: 1000, + Model: openai.O1Mini, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleUser, + }, + { + Role: openai.ChatMessageRoleAssistant, + }, + }, + Temperature: float32(2), + }, + expectedError: openai.ErrO1BetaLimitationsOther, + }, + { + name: "set_top_unsupported", + in: openai.ChatCompletionRequest{ + MaxCompletionsTokens: 1000, + Model: openai.O1Mini, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleUser, + }, + { + Role: openai.ChatMessageRoleAssistant, + }, + }, + Temperature: float32(1), + TopP: float32(0.1), + }, + expectedError: openai.ErrO1BetaLimitationsOther, + }, + { + name: "set_n_unsupported", + in: openai.ChatCompletionRequest{ + MaxCompletionsTokens: 1000, + Model: openai.O1Mini, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleUser, + }, + { + Role: openai.ChatMessageRoleAssistant, + }, + }, + Temperature: float32(1), + TopP: float32(1), + N: 2, + }, + expectedError: openai.ErrO1BetaLimitationsOther, + }, + { + name: "set_presence_penalty_unsupported", + in: openai.ChatCompletionRequest{ + MaxCompletionsTokens: 1000, + Model: openai.O1Mini, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleUser, + }, + { + Role: openai.ChatMessageRoleAssistant, + }, + }, + PresencePenalty: float32(1), + }, + expectedError: openai.ErrO1BetaLimitationsOther, + }, + { + name: "set_frequency_penalty_unsupported", + in: openai.ChatCompletionRequest{ + MaxCompletionsTokens: 1000, + Model: openai.O1Mini, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleUser, + }, + { + Role: openai.ChatMessageRoleAssistant, + }, + }, + FrequencyPenalty: float32(0.1), + }, + expectedError: openai.ErrO1BetaLimitationsOther, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := openai.DefaultConfig("whatever") + config.BaseURL = "http://localhost/v1" + client := openai.NewClientWithConfig(config) + ctx := context.Background() + + _, err := client.CreateChatCompletion(ctx, tt.in) + checks.HasError(t, err) + msg := fmt.Sprintf("CreateChatCompletion should return wrong model error, returned: %s", err) + checks.ErrorIs(t, err, tt.expectedError, msg) + }) + } +} + func TestChatRequestOmitEmpty(t *testing.T) { data, err := json.Marshal(openai.ChatCompletionRequest{ // We set model b/c it's required, so omitempty doesn't make sense @@ -97,6 +290,24 @@ func TestChatCompletions(t *testing.T) { checks.NoError(t, err, "CreateChatCompletion error") } +// TestCompletions Tests the completions endpoint of the API using the mocked server. +func TestO1ModelChatCompletions(t *testing.T) { + client, server, teardown := setupOpenAITestServer() + defer teardown() + server.RegisterHandler("/v1/chat/completions", handleChatCompletionEndpoint) + _, err := client.CreateChatCompletion(context.Background(), openai.ChatCompletionRequest{ + Model: openai.O1Preview, + MaxCompletionsTokens: 1000, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleUser, + Content: "Hello!", + }, + }, + }) + checks.NoError(t, err, "CreateChatCompletion error") +} + // TestCompletions Tests the completions endpoint of the API using the mocked server. func TestChatCompletionsWithHeaders(t *testing.T) { client, server, teardown := setupOpenAITestServer() diff --git a/completion.go b/completion.go index e1e065a8b..8e3172ace 100644 --- a/completion.go +++ b/completion.go @@ -7,11 +7,20 @@ import ( ) var ( + ErrO1MaxTokensDeprecated = errors.New("this model is not supported MaxTokens, please use MaxCompletionsTokens") //nolint:lll ErrCompletionUnsupportedModel = errors.New("this model is not supported with this method, please use CreateChatCompletion client method instead") //nolint:lll ErrCompletionStreamNotSupported = errors.New("streaming is not supported with this method, please use CreateCompletionStream") //nolint:lll ErrCompletionRequestPromptTypeNotSupported = errors.New("the type of CompletionRequest.Prompt only supports string and []string") //nolint:lll ) +var ( + ErrO1BetaLimitationsMessageTypes = errors.New("this model has beta-limitations, user and assistant messages only, system messages are not supported") //nolint:lll + ErrO1BetaLimitationsStreaming = errors.New("this model has beta-limitations, streaming not supported") //nolint:lll + ErrO1BetaLimitationsTools = errors.New("this model has beta-limitations, tools, function calling, and response format parameters are not supported") //nolint:lll + ErrO1BetaLimitationsLogprobs = errors.New("this model has beta-limitations, logprobs not supported") //nolint:lll + ErrO1BetaLimitationsOther = errors.New("this model has beta-limitations, temperature, top_p and n are fixed at 1, while presence_penalty and frequency_penalty are fixed at 0") //nolint:lll +) + // GPT3 Defines the models provided by OpenAI to use when generating // completions from OpenAI. // GPT3 Models are designed for text-based tasks. For code-specific @@ -85,6 +94,15 @@ const ( CodexCodeDavinci001 = "code-davinci-001" ) +// O1SeriesModels List of new Series of OpenAI models. +// Some old api attributes not supported. +var O1SeriesModels = map[string]struct{}{ + O1Mini: {}, + O1Mini20240912: {}, + O1Preview: {}, + O1Preview20240912: {}, +} + var disabledModelsForEndpoints = map[string]map[string]bool{ "/completions": { O1Mini: true, @@ -146,6 +164,70 @@ func checkPromptType(prompt any) bool { return isString || isStringSlice } +var unsupportedToolsForO1Models = map[ToolType]struct{}{ + ToolTypeFunction: {}, +} + +var availableMessageRoleForO1Models = map[string]struct{}{ + ChatMessageRoleUser: {}, + ChatMessageRoleAssistant: {}, +} + +// validateRequestForO1Models checks for deprecated fields of OpenAI models. +func validateRequestForO1Models(request ChatCompletionRequest) error { + if _, found := O1SeriesModels[request.Model]; !found { + return nil + } + + if request.MaxTokens > 0 { + return ErrO1MaxTokensDeprecated + } + + // Beta Limitations + // refs:https://platform.openai.com/docs/guides/reasoning/beta-limitations + // Streaming: not supported + if request.Stream { + return ErrO1BetaLimitationsStreaming + } + // Logprobs: not supported. + if request.LogProbs { + return ErrO1BetaLimitationsLogprobs + } + + // Message types: user and assistant messages only, system messages are not supported. + for _, m := range request.Messages { + if _, found := availableMessageRoleForO1Models[m.Role]; !found { + return ErrO1BetaLimitationsMessageTypes + } + } + + // Tools: tools, function calling, and response format parameters are not supported + for _, t := range request.Tools { + if _, found := unsupportedToolsForO1Models[t.Type]; found { + return ErrO1BetaLimitationsTools + } + } + + // Other: temperature, top_p and n are fixed at 1, while presence_penalty and frequency_penalty are fixed at 0. + if request.Temperature > 0 && request.Temperature != 1 { + return ErrO1BetaLimitationsOther + } + if request.TopP > 0 && request.TopP != 1 { + return ErrO1BetaLimitationsOther + } + if request.N > 0 && request.N != 1 { + return ErrO1BetaLimitationsOther + } + if request.PresencePenalty > 0 { + return ErrO1BetaLimitationsOther + } + if request.FrequencyPenalty > 0 { + return ErrO1BetaLimitationsOther + } + + return nil +} + // CompletionRequest represents a request structure for completion API. type CompletionRequest struct { Model string `json:"model"`