diff --git a/app/http/controller/web/rag_controller.go b/app/http/controller/web/rag_controller.go index 7b51562..4c75314 100644 --- a/app/http/controller/web/rag_controller.go +++ b/app/http/controller/web/rag_controller.go @@ -1,6 +1,7 @@ package web import ( + "catface/app/global/consts" "catface/app/global/errcode" "catface/app/global/variable" "catface/app/model" @@ -12,6 +13,7 @@ import ( "catface/app/utils/response" "io" "net/http" + "strconv" "github.com/gin-gonic/gin" "github.com/gorilla/websocket" @@ -21,6 +23,20 @@ import ( type Rag struct { } +func (r *Rag) Release(context *gin.Context) { + token := context.GetString(consts.ValidatorPrefix + "token") + if ok := variable.GlmClientHub.ReleaseOneGlmClient(token); ok { + variable.ZapLog.Info("释放一个 GLM Client", + zap.String("token", token), + zap.String("当前空闲连接数", strconv.Itoa(variable.GlmClientHub.Idle))) + } else { + variable.ZapLog.Warn("尝试释放一个 GLM Client,但是 token 无效", + zap.String("当前空闲连接数", strconv.Itoa(variable.GlmClientHub.Idle))) + } + + response.Success(context, consts.CurdStatusOkMsg, "") +} + // v1 Http-POST 版本; chat 需要不使用 ch 的版本。 // func (r *Rag) Chat(context *gin.Context) { // // 1. query embedding diff --git a/app/http/validator/common/register_validator/web_register_validator.go b/app/http/validator/common/register_validator/web_register_validator.go index ba8a828..9c3c978 100644 --- a/app/http/validator/common/register_validator/web_register_validator.go +++ b/app/http/validator/common/register_validator/web_register_validator.go @@ -89,6 +89,8 @@ func WebRegisterValidator() { // TAG RAG key = consts.ValidatorPrefix + "RagDefaultChat" containers.Set(key, rag.Chat{}) + key = consts.ValidatorPrefix + "RagRelease" + containers.Set(key, rag.Release{}) // TAG Search key = consts.ValidatorPrefix + "SearchAll" diff --git a/app/http/validator/web/rag/release.go b/app/http/validator/web/rag/release.go new file mode 100644 index 0000000..c28b1ec --- /dev/null +++ b/app/http/validator/web/rag/release.go @@ -0,0 +1,27 @@ +package rag + +import ( + "catface/app/global/consts" + "catface/app/http/controller/web" + "catface/app/http/validator/core/data_transfer" + "catface/app/utils/response" + + "github.com/gin-gonic/gin" +) + +type Release struct { + Token string `form:"token" json:"token"` +} + +func (r Release) CheckParams(context *gin.Context) { + if err := context.ShouldBind(&r); err != nil { + response.ValidatorError(context, err) + return + } + extraAddBindDataContext := data_transfer.DataAddContext(r, consts.ValidatorPrefix, context) + if extraAddBindDataContext == nil { + response.ErrorSystem(context, "RAG RELEASE 表单验证器json化失败", "") + } else { + (&web.Rag{}).Release(extraAddBindDataContext) + } +} diff --git a/app/utils/llm_factory/glm_client.go b/app/utils/llm_factory/glm_client.go index 68a2d0f..ba9eac2 100644 --- a/app/utils/llm_factory/glm_client.go +++ b/app/utils/llm_factory/glm_client.go @@ -10,8 +10,8 @@ import ( // INFO 维护 GLM Client 与用户之间的客户端消息队列,也就是在 "github.com/yankeguo/zhipu" 的基础上实现一层封装。 type GlmClientHub struct { - MaxIdle int - MaxActive int + Idle int // 最大连接数 + Active int ApiKey string DefaultModelName string InitPrompt string @@ -27,8 +27,8 @@ type ClientInfo struct { func InitGlmClientHub(maxIdle, maxActive, lifetime int, apiKey, defaultModelName, initPrompt string) *GlmClientHub { hub := &GlmClientHub{ - MaxIdle: maxIdle, - MaxActive: maxActive, + Idle: maxIdle, + Active: maxActive, ApiKey: apiKey, DefaultModelName: defaultModelName, InitPrompt: initPrompt, @@ -58,8 +58,8 @@ func (g *GlmClientHub) GetOneGlmClientInfo(token string, mode int) (clientInfo * } // 空闲数检查 - if g.MaxIdle > 0 { - g.MaxIdle -= 1 + if g.Idle > 0 { + g.Idle -= 1 } else { code = errcode.ErrGlmBusy return @@ -116,7 +116,7 @@ func (g *GlmClientHub) cleanupClients() { for token, info := range g.Clients { if now.Sub(info.LastUsed) > g.LifeTime { delete(g.Clients, token) - g.MaxIdle += 1 + g.Idle += 1 } } } @@ -126,9 +126,13 @@ func (g *GlmClientHub) cleanupClients() { * @param {string} token * @return {*} */ -func (g *GlmClientHub) ReleaseOneGlmClient(token string) { - delete(g.Clients, token) - g.MaxIdle += 1 +func (g *GlmClientHub) ReleaseOneGlmClient(token string) bool { + if _, exists := g.Clients[token]; exists { + delete(g.Clients, token) + g.Idle += 1 + return true + } + return false } // TAG ClientInfo diff --git a/bootstrap/init.go b/bootstrap/init.go index 0078f33..5f20a01 100644 --- a/bootstrap/init.go +++ b/bootstrap/init.go @@ -116,8 +116,8 @@ func init() { // 11. GLM 资源池管理 初始化 variable.GlmClientHub = llm_factory.InitGlmClientHub( - variable.ConfigYml.GetInt("Glm.MaxActive"), variable.ConfigYml.GetInt("Glm.MaxIdle"), + variable.ConfigYml.GetInt("Glm.MaxActive"), variable.ConfigYml.GetInt("Glm.LifeTime"), variable.ConfigYml.GetString("Glm.ApiKey"), variable.ConfigYml.GetString("Glm.DefaultModelName"), diff --git a/routers/web.go b/routers/web.go index ce86326..2b71a9e 100644 --- a/routers/web.go +++ b/routers/web.go @@ -153,6 +153,7 @@ func InitWebRouter() *gin.Engine { rag := backend.Group("rag") { rag.GET("default_talk", validatorFactory.Create(consts.ValidatorPrefix+"RagDefaultChat")) + rag.DELETE("", validatorFactory.Create(consts.ValidatorPrefix+"RagRelease")) } search := backend.Group("search")