diff --git a/app/http/controller/web/nlp_controller.go b/app/http/controller/web/nlp_controller.go index 4e5ee3c..0ba30cb 100644 --- a/app/http/controller/web/nlp_controller.go +++ b/app/http/controller/web/nlp_controller.go @@ -22,6 +22,7 @@ func (n *Nlp) Title(context *gin.Context) { if ercode > 0 { response.Fail(context, ercode, errcode.ErrMsg[ercode], errcode.ErrMsgForUser[ercode]) } + defer variable.GlmClientHub.UnavtiveOneGlmClient(tempGlmKey) defer variable.GlmClientHub.ReleaseOneGlmClient(tempGlmKey) // 临时使用,用完就释放。 newTitle := nlp.GenerateTitle(content, client) diff --git a/app/http/controller/web/rag_controller.go b/app/http/controller/web/rag_controller.go index 4c75314..5dad542 100644 --- a/app/http/controller/web/rag_controller.go +++ b/app/http/controller/web/rag_controller.go @@ -87,6 +87,7 @@ func (r *Rag) ChatSSE(context *gin.Context) { response.Fail(context, ercode, errcode.ErrMsg[ercode], errcode.ErrMsgForUser[ercode]) return } + defer variable.GlmClientHub.UnavtiveOneGlmClient(token) // INFO ws 结束时,取消 Avtive 的占用。 // 1. query embedding embedding, ok := nlp.GetEmbedding([]string{query}) @@ -176,6 +177,7 @@ func (r *Rag) ChatWebSocket(context *gin.Context) { } return } + defer variable.GlmClientHub.UnavtiveOneGlmClient(token) // INFO ws 结束时,取消 Avtive 的占用。 // 1. query embedding clientInfo.AddQuery(query) diff --git a/app/utils/llm_factory/glm_client.go b/app/utils/llm_factory/glm_client.go index ba9eac2..7565aae 100644 --- a/app/utils/llm_factory/glm_client.go +++ b/app/utils/llm_factory/glm_client.go @@ -11,12 +11,12 @@ import ( type GlmClientHub struct { Idle int // 最大连接数 - Active int + Active int // 最大活跃数 ApiKey string DefaultModelName string InitPrompt string Clients map[string]*ClientInfo - LifeTime time.Duration + LifeTime time.Duration // 最长待机周期 } type ClientInfo struct { @@ -58,8 +58,9 @@ func (g *GlmClientHub) GetOneGlmClientInfo(token string, mode int) (clientInfo * } // 空闲数检查 - if g.Idle > 0 { + if g.Idle > 0 && g.Active > 0 { g.Idle -= 1 + g.Active -= 1 } else { code = errcode.ErrGlmBusy return @@ -121,6 +122,21 @@ func (g *GlmClientHub) cleanupClients() { } } +/** + * @description: ws 服务完毕,进入待机状态。 + * @param {string} token + * @return {*} + * @Tip 对于临时使用的小功能,需要依次 defer 下面两个函数。 + */ +func (g *GlmClientHub) UnavtiveOneGlmClient(token string) bool { + if clientInfo, exists := g.Clients[token]; exists { + g.Active -= 1 + clientInfo.LastUsed = time.Now() + return true + } + return false +} + /** * @description: 显式地释放资源。 * @param {string} token