From f8e28c371902516892ecf3d151ea3e9cda2f122e Mon Sep 17 00:00:00 2001 From: wozulong <> Date: Sat, 18 May 2024 22:46:48 +0800 Subject: [PATCH] update max_tokens Signed-off-by: wozulong <> --- README.md | 12 ++++++++--- config.json.example | 1 - go.mod | 3 --- go.sum | 6 ------ main.go | 49 ++++----------------------------------------- 5 files changed, 13 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index 2202233..c9e6267 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,6 @@ "codex_api_key": "sk-xxx", "codex_api_organization": "", "codex_api_project": "", - "codex_max_tokens": 4093, "chat_api_base": "https://api-proxy.oaipro.com/v1", "chat_api_key": "sk-xxx", "chat_api_organization": "", @@ -49,12 +48,19 @@ `chat_model_map` 是个模型映射的字典。会将请求的模型映射到你想要的,如果不存在映射,则使用 `chat_model_default` 。 -`codex_max_tokens` 可以设置为你希望的最大Token数,你设置的时候最好知道自己在做什么。 - `chat_max_tokens` 可以设置为你希望的最大Token数,你设置的时候最好知道自己在做什么。`gpt-4o` 输出最大为 `4096` 可以通过 `OVERRIDE_` + 大写配置项作为环境变量,可以覆盖 `config.json` 中的值。例如:`OVERRIDE_CODEX_API_KEY=sk-xxxx` +### 重要说明 +`codex_max_tokens` 工作并不完美,已经移除。**JetBrains IDE 完美工作**,`VSCode` 需要执行以下脚本Patch之: + +* macOS `sed -i '' -E 's/\.maxPromptCompletionTokens\(([a-zA-Z0-9_]+),([0-9]+)\)/.maxPromptCompletionTokens(\1,2048)/' ~/.vscode/extensions/github.copilot-*/dist/extension.js` +* Linux `sed -E 's/\.maxPromptCompletionTokens\(([a-zA-Z0-9_]+),([0-9]+)\)/.maxPromptCompletionTokens(\1,2048)/' ~/.vscode/extensions/github.copilot-*/dist/extension.js` +* Windows 不知道怎么写,期待大佬PR。 +* 因为是Patch,所以:**Copilot每次升级都要执行一次**。 +* 具体原因是客户端需要根据 `max_tokens` 精密计算prompt,后台删减会有问题。 + ### 其他说明 1. 理论上,Chat 部分可以使用 `chat2api` ,而 Codex 代码生成部分则不太适合使用 `chat2api` 。 2. 代码生成部分做过延时生成和客户端 Cancel 处理,很有效节省你的Token。 diff --git a/config.json.example b/config.json.example index 6c2a5d2..1712968 100644 --- a/config.json.example +++ b/config.json.example @@ -6,7 +6,6 @@ "codex_api_key": "sk-xxx", "codex_api_organization": "", "codex_api_project": "", - "codex_max_tokens": 2048, "chat_api_base": "https://api-proxy.oaipro.com/v1", "chat_api_key": "sk-xxx", "chat_api_organization": "", diff --git a/go.mod b/go.mod index 34c8f1d..5ef356a 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,6 @@ toolchain go1.21.4 require ( github.com/gin-gonic/gin v1.10.0 - github.com/linux-do/tiktoken-go v0.7.0 github.com/tidwall/gjson v1.17.1 github.com/tidwall/sjson v1.2.5 golang.org/x/net v0.25.0 @@ -17,7 +16,6 @@ require ( github.com/bytedance/sonic/loader v0.1.1 // indirect github.com/cloudwego/base64x v0.1.4 // indirect github.com/cloudwego/iasm v0.2.0 // indirect - github.com/dlclark/regexp2 v1.11.0 // indirect github.com/gabriel-vasile/mimetype v1.4.3 // indirect github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-playground/locales v0.14.1 // indirect @@ -25,7 +23,6 @@ require ( github.com/go-playground/validator/v10 v10.20.0 // indirect github.com/goccy/go-json v0.10.2 // indirect github.com/google/go-cmp v0.5.9 // indirect - github.com/google/uuid v1.6.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/cpuid/v2 v2.2.7 // indirect github.com/kr/pretty v0.3.0 // indirect diff --git a/go.sum b/go.sum index e05fea7..ebce207 100644 --- a/go.sum +++ b/go.sum @@ -10,8 +10,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= -github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= @@ -31,8 +29,6 @@ github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MG github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= @@ -49,8 +45,6 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= -github.com/linux-do/tiktoken-go v0.7.0 h1:Kcm/miJ5gp77srtF8GQWnfq7W9kTaXEuHZg/g9IVEu8= -github.com/linux-do/tiktoken-go v0.7.0/go.mod h1:9Vkdtp0ngi4USmrdSx984iuIQ5IMr0hnUdz4jZZTJb8= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= diff --git a/main.go b/main.go index 4f55082..f77175f 100644 --- a/main.go +++ b/main.go @@ -6,7 +6,6 @@ import ( "encoding/json" "errors" "github.com/gin-gonic/gin" - "github.com/linux-do/tiktoken-go" "github.com/tidwall/gjson" "github.com/tidwall/sjson" "golang.org/x/net/http2" @@ -31,7 +30,6 @@ type config struct { CodexApiKey string `json:"codex_api_key"` CodexApiOrganization string `json:"codex_api_organization"` CodexApiProject string `json:"codex_api_project"` - CodexMaxTokens int `json:"codex_max_tokens"` ChatApiBase string `json:"chat_api_base"` ChatApiKey string `json:"chat_api_key"` ChatApiOrganization string `json:"chat_api_organization"` @@ -136,9 +134,8 @@ func closeIO(c io.Closer) { } type ProxyService struct { - cfg *config - client *http.Client - tokenizer *tiktoken.Tiktoken + cfg *config + client *http.Client } func NewProxyService(cfg *config) (*ProxyService, error) { @@ -147,15 +144,9 @@ func NewProxyService(cfg *config) (*ProxyService, error) { return nil, err } - tokenizer, err := tiktoken.EncodingForModel(InstructModel) - if nil != err { - return nil, err - } - return &ProxyService{ - cfg: cfg, - client: client, - tokenizer: tokenizer, + cfg: cfg, + client: client, }, nil } @@ -234,14 +225,6 @@ func (s *ProxyService) completions(c *gin.Context) { _, _ = io.Copy(c.Writer, resp.Body) } -func (s *ProxyService) countToken(token string) int { - if "" == token { - return 0 - } - - return len(s.tokenizer.Encode(token, nil, nil)) -} - func (s *ProxyService) codeCompletions(c *gin.Context) { ctx := c.Request.Context() @@ -257,30 +240,6 @@ func (s *ProxyService) codeCompletions(c *gin.Context) { return } - prompt := gjson.GetBytes(body, "prompt").String() - suffix := gjson.GetBytes(body, "suffix").String() - inputTokens := s.countToken(prompt) - suffixTokens := s.countToken(suffix) - outputTokens := int(gjson.GetBytes(body, "max_tokens").Int()) - - totalTokens := inputTokens + suffixTokens + outputTokens - if totalTokens > s.cfg.CodexMaxTokens { // reduce - left, right := 0, len(prompt) - for left < right { - mid := (left + right) / 2 - subPrompt := prompt[mid:] - subInputTokens := s.countToken(subPrompt) - totalTokens = subInputTokens + suffixTokens + outputTokens - if totalTokens > s.cfg.CodexMaxTokens { - left = mid + 1 - } else { - right = mid - } - } - - body, _ = sjson.SetBytes(body, "prompt", prompt[left:]) - } - body, _ = sjson.DeleteBytes(body, "extra") body, _ = sjson.DeleteBytes(body, "nwo") body, _ = sjson.SetBytes(body, "model", InstructModel)