From f263ead51d460df69d8afd8ae3da60c56a30b8dd Mon Sep 17 00:00:00 2001
From: Havoc412 <2993167370@qq.com>
Date: Wed, 20 Nov 2024 09:01:53 +0800
Subject: [PATCH] =?UTF-8?q?refactor(llm=5Ffactory):=20=E6=96=B0=E5=A2=9E?=
 =?UTF-8?q?=20GLM=20=E5=AE=A2=E6=88=B7=E7=AB=AF=E6=9C=AA=E4=BD=BF=E7=94=A8?=
 =?UTF-8?q?=E8=B5=84=E6=BA=90=E9=87=8A=E6=94=BE=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 在 NlpController 和 RagController 中添加了释放 GLM 客户端资源的逻辑
- 在 GlmClientHub 中增加了 UnavtiveOneGlmClient 方法，用于将客户端标记为未使用状态
- 优化了资源管理，提高了 GLM 客户端的利用率
---
 app/http/controller/web/nlp_controller.go |  1 +
 app/http/controller/web/rag_controller.go |  2 ++
 app/utils/llm_factory/glm_client.go       | 22 +++++++++++++++++++---
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/app/http/controller/web/nlp_controller.go b/app/http/controller/web/nlp_controller.go
index 4e5ee3c..0ba30cb 100644
--- a/app/http/controller/web/nlp_controller.go
+++ b/app/http/controller/web/nlp_controller.go
@@ -22,6 +22,7 @@ func (n *Nlp) Title(context *gin.Context) {
 	if ercode > 0 {
 		response.Fail(context, ercode, errcode.ErrMsg[ercode], errcode.ErrMsgForUser[ercode])
 	}
+	defer variable.GlmClientHub.UnavtiveOneGlmClient(tempGlmKey)
 	defer variable.GlmClientHub.ReleaseOneGlmClient(tempGlmKey) // 临时使用，用完就释放。
 
 	newTitle := nlp.GenerateTitle(content, client)
diff --git a/app/http/controller/web/rag_controller.go b/app/http/controller/web/rag_controller.go
index 4c75314..5dad542 100644
--- a/app/http/controller/web/rag_controller.go
+++ b/app/http/controller/web/rag_controller.go
@@ -87,6 +87,7 @@ func (r *Rag) ChatSSE(context *gin.Context) {
 		response.Fail(context, ercode, errcode.ErrMsg[ercode], errcode.ErrMsgForUser[ercode])
 		return
 	}
+	defer variable.GlmClientHub.UnavtiveOneGlmClient(token) // INFO ws 结束时，取消 Avtive 的占用。
 
 	// 1. query embedding
 	embedding, ok := nlp.GetEmbedding([]string{query})
@@ -176,6 +177,7 @@ func (r *Rag) ChatWebSocket(context *gin.Context) {
 		}
 		return
 	}
+	defer variable.GlmClientHub.UnavtiveOneGlmClient(token) // INFO ws 结束时，取消 Avtive 的占用。
 
 	// 1. query embedding
 	clientInfo.AddQuery(query)
diff --git a/app/utils/llm_factory/glm_client.go b/app/utils/llm_factory/glm_client.go
index ba9eac2..7565aae 100644
--- a/app/utils/llm_factory/glm_client.go
+++ b/app/utils/llm_factory/glm_client.go
@@ -11,12 +11,12 @@ import (
 
 type GlmClientHub struct {
 	Idle             int // 最大连接数
-	Active           int
+	Active           int // 最大活跃数
 	ApiKey           string
 	DefaultModelName string
 	InitPrompt       string
 	Clients          map[string]*ClientInfo
-	LifeTime         time.Duration
+	LifeTime         time.Duration // 最长待机周期
 }
 
 type ClientInfo struct {
@@ -58,8 +58,9 @@ func (g *GlmClientHub) GetOneGlmClientInfo(token string, mode int) (clientInfo *
 	}
 
 	// 空闲数检查
-	if g.Idle > 0 {
+	if g.Idle > 0 && g.Active > 0 {
 		g.Idle -= 1
+		g.Active -= 1
 	} else {
 		code = errcode.ErrGlmBusy
 		return
@@ -121,6 +122,21 @@ func (g *GlmClientHub) cleanupClients() {
 	}
 }
 
+/**
+ * @description: ws 服务完毕，进入待机状态。
+ * @param {string} token
+ * @return {*}
+ * @Tip 对于临时使用的小功能，需要依次 defer 下面两个函数。
+ */
+func (g *GlmClientHub) UnavtiveOneGlmClient(token string) bool {
+	if clientInfo, exists := g.Clients[token]; exists {
+		g.Active -= 1
+		clientInfo.LastUsed = time.Now()
+		return true
+	}
+	return false
+}
+
 /**
  * @description: 显式地释放资源。
  * @param {string} token