From f263ead51d460df69d8afd8ae3da60c56a30b8dd Mon Sep 17 00:00:00 2001 From: Havoc412 <2993167370@qq.com> Date: Wed, 20 Nov 2024 09:01:53 +0800 Subject: [PATCH] =?UTF-8?q?refactor(llm=5Ffactory):=20=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=20GLM=20=E5=AE=A2=E6=88=B7=E7=AB=AF=E6=9C=AA=E4=BD=BF=E7=94=A8?= =?UTF-8?q?=E8=B5=84=E6=BA=90=E9=87=8A=E6=94=BE=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 NlpController 和 RagController 中添加了释放 GLM 客户端资源的逻辑 - 在 GlmClientHub 中增加了 UnavtiveOneGlmClient 方法,用于将客户端标记为未使用状态 - 优化了资源管理,提高了 GLM 客户端的利用率 --- app/http/controller/web/nlp_controller.go | 1 + app/http/controller/web/rag_controller.go | 2 ++ app/utils/llm_factory/glm_client.go | 22 +++++++++++++++++++--- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/app/http/controller/web/nlp_controller.go b/app/http/controller/web/nlp_controller.go index 4e5ee3c..0ba30cb 100644 --- a/app/http/controller/web/nlp_controller.go +++ b/app/http/controller/web/nlp_controller.go @@ -22,6 +22,7 @@ func (n *Nlp) Title(context *gin.Context) { if ercode > 0 { response.Fail(context, ercode, errcode.ErrMsg[ercode], errcode.ErrMsgForUser[ercode]) } + defer variable.GlmClientHub.UnavtiveOneGlmClient(tempGlmKey) defer variable.GlmClientHub.ReleaseOneGlmClient(tempGlmKey) // 临时使用,用完就释放。 newTitle := nlp.GenerateTitle(content, client) diff --git a/app/http/controller/web/rag_controller.go b/app/http/controller/web/rag_controller.go index 4c75314..5dad542 100644 --- a/app/http/controller/web/rag_controller.go +++ b/app/http/controller/web/rag_controller.go @@ -87,6 +87,7 @@ func (r *Rag) ChatSSE(context *gin.Context) { response.Fail(context, ercode, errcode.ErrMsg[ercode], errcode.ErrMsgForUser[ercode]) return } + defer variable.GlmClientHub.UnavtiveOneGlmClient(token) // INFO ws 结束时,取消 Avtive 的占用。 // 1. query embedding embedding, ok := nlp.GetEmbedding([]string{query}) @@ -176,6 +177,7 @@ func (r *Rag) ChatWebSocket(context *gin.Context) { } return } + defer variable.GlmClientHub.UnavtiveOneGlmClient(token) // INFO ws 结束时,取消 Avtive 的占用。 // 1. query embedding clientInfo.AddQuery(query) diff --git a/app/utils/llm_factory/glm_client.go b/app/utils/llm_factory/glm_client.go index ba9eac2..7565aae 100644 --- a/app/utils/llm_factory/glm_client.go +++ b/app/utils/llm_factory/glm_client.go @@ -11,12 +11,12 @@ import ( type GlmClientHub struct { Idle int // 最大连接数 - Active int + Active int // 最大活跃数 ApiKey string DefaultModelName string InitPrompt string Clients map[string]*ClientInfo - LifeTime time.Duration + LifeTime time.Duration // 最长待机周期 } type ClientInfo struct { @@ -58,8 +58,9 @@ func (g *GlmClientHub) GetOneGlmClientInfo(token string, mode int) (clientInfo * } // 空闲数检查 - if g.Idle > 0 { + if g.Idle > 0 && g.Active > 0 { g.Idle -= 1 + g.Active -= 1 } else { code = errcode.ErrGlmBusy return @@ -121,6 +122,21 @@ func (g *GlmClientHub) cleanupClients() { } } +/** + * @description: ws 服务完毕,进入待机状态。 + * @param {string} token + * @return {*} + * @Tip 对于临时使用的小功能,需要依次 defer 下面两个函数。 + */ +func (g *GlmClientHub) UnavtiveOneGlmClient(token string) bool { + if clientInfo, exists := g.Clients[token]; exists { + g.Active -= 1 + clientInfo.LastUsed = time.Now() + return true + } + return false +} + /** * @description: 显式地释放资源。 * @param {string} token