refactor(llm_factory): 新增 GLM 客户端未使用资源释放功能

- 在 NlpController 和 RagController 中添加了释放 GLM 客户端资源的逻辑 - 在 GlmClientHub 中增加了 UnavtiveOneGlmClient 方法，用于将客户端标记为未使用状态 - 优化了资源管理，提高了 GLM 客户端的利用率
2024-11-20 09:01:53 +08:00 · 2024-11-20 09:01:53 +08:00 · f263ead51d
commit f263ead51d
parent ea102eef60
3 changed files with 22 additions and 3 deletions
--- a/app/http/controller/web/nlp_controller.go
+++ b/app/http/controller/web/nlp_controller.go
@ -22,6 +22,7 @@ func (n *Nlp) Title(context *gin.Context) {
 	if ercode > 0 {
 		response.Fail(context, ercode, errcode.ErrMsg[ercode], errcode.ErrMsgForUser[ercode])
 	}
 	defer variable.GlmClientHub.UnavtiveOneGlmClient(tempGlmKey)
 	defer variable.GlmClientHub.ReleaseOneGlmClient(tempGlmKey) // 临时使用，用完就释放。
 	newTitle := nlp.GenerateTitle(content, client)
--- a/app/http/controller/web/rag_controller.go
+++ b/app/http/controller/web/rag_controller.go
@ -87,6 +87,7 @@ func (r *Rag) ChatSSE(context *gin.Context) {
 		response.Fail(context, ercode, errcode.ErrMsg[ercode], errcode.ErrMsgForUser[ercode])
 		return
 	}
 	defer variable.GlmClientHub.UnavtiveOneGlmClient(token) // INFO ws 结束时，取消 Avtive 的占用。
 	// 1. query embedding
 	embedding, ok := nlp.GetEmbedding([]string{query})
@ -176,6 +177,7 @@ func (r *Rag) ChatWebSocket(context *gin.Context) {
 		}
 		return
 	}
 	defer variable.GlmClientHub.UnavtiveOneGlmClient(token) // INFO ws 结束时，取消 Avtive 的占用。
 	// 1. query embedding
 	clientInfo.AddQuery(query)
--- a/app/utils/llm_factory/glm_client.go
+++ b/app/utils/llm_factory/glm_client.go
@ -11,12 +11,12 @@ import (
 type GlmClientHub struct {
 	Idle             int // 最大连接数
-	Active           int
+	Active           int // 最大活跃数
 	ApiKey           string
 	DefaultModelName string
 	InitPrompt       string
 	Clients          map[string]*ClientInfo
-	LifeTime         time.Duration
+	LifeTime         time.Duration // 最长待机周期
 }
 type ClientInfo struct {
@ -58,8 +58,9 @@ func (g *GlmClientHub) GetOneGlmClientInfo(token string, mode int) (clientInfo *
 	}
 	// 空闲数检查
-	if g.Idle > 0 {
+	if g.Idle > 0 && g.Active > 0 {
 		g.Idle -= 1
 		g.Active -= 1
 	} else {
 		code = errcode.ErrGlmBusy
 		return
@ -121,6 +122,21 @@ func (g *GlmClientHub) cleanupClients() {
 	}
 }
 /**
 * @description: ws 服务完毕，进入待机状态。
 * @param {string} token
 * @return {*}
 * @Tip 对于临时使用的小功能，需要依次 defer 下面两个函数。
 */
 func (g *GlmClientHub) UnavtiveOneGlmClient(token string) bool {
 	if clientInfo, exists := g.Clients[token]; exists {
 		g.Active -= 1
 		clientInfo.LastUsed = time.Now()
 		return true
 	}
 	return false
 }
 /**
 * @description: 显式地释放资源。
 * @param {string} token