From 3b719c3addafbbcfdee3bef09b3c49ba37036d3b Mon Sep 17 00:00:00 2001 From: Havoc412 <2993167370@qq.com> Date: Tue, 19 Nov 2024 11:27:17 +0800 Subject: [PATCH] =?UTF-8?q?feat(model=5Fes):=20=E5=A2=9E=E5=8A=A0=20NLP=20?= =?UTF-8?q?=E7=94=9F=E6=88=90=E7=9A=84=20embedding=20=E5=AD=97=E6=AE=B5?= =?UTF-8?q?=E5=B9=B6=E4=BC=98=E5=8C=96=20ES=20=E6=93=8D=E4=BD=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 Encounter 结构中添加 Embedding 字段,用于存储 NLP 生成的向量 - 修改 InsertDocument 方法,增加 embedding 数据的生成和存储 - 更新 QueryDocumentsMatchAll 方法,调整搜索结果的源字段 - 优化数据绑定逻辑,支持自定义 bind 标签 - 添加 TODO 注释,标记需要进一步解决的问题 --- .../controller/web/encounter_controller.go | 2 +- app/model_es/doc.go | 2 +- app/model_es/encounter.go | 21 +++++++++++++------ app/utils/data_bind/formdata_to_model.go | 3 +++ 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/app/http/controller/web/encounter_controller.go b/app/http/controller/web/encounter_controller.go index 2f5b130..1b1928c 100644 --- a/app/http/controller/web/encounter_controller.go +++ b/app/http/controller/web/encounter_controller.go @@ -73,7 +73,7 @@ func (e *Encounters) Create(context *gin.Context) { // 2: EA Links; // TIP 感觉直接使用 go 会直接且清晰。 go model.CreateEncounterAnimalLinkFactory("").Insert(encounter.Id, animals_id) - // 3. ES speed + // 3. ES speed // TODO 这里如何实现 不同 DB 之间的 “事务” 概念。 if level := int(context.GetFloat64(consts.ValidatorPrefix + "level")); level > 1 { go model_es.CreateEncounterESFactory(&encounter).InsertDocument() } diff --git a/app/model_es/doc.go b/app/model_es/doc.go index 4f92693..6e86e0d 100644 --- a/app/model_es/doc.go +++ b/app/model_es/doc.go @@ -45,7 +45,7 @@ func (d *Doc) TopK(embedding []float64, k int) ([]Doc, error) { } } }, - "_source": ["content"] + "_source": ["id", "content"] }`, k, string(paramsJSON)) hits, err := model_handler.SearchRequest(body, d.IndexName()) diff --git a/app/model_es/encounter.go b/app/model_es/encounter.go index 4330a0d..d5d1187 100644 --- a/app/model_es/encounter.go +++ b/app/model_es/encounter.go @@ -5,6 +5,7 @@ import ( "catface/app/global/consts" "catface/app/global/variable" "catface/app/model" + "catface/app/service/nlp" "catface/app/utils/data_bind" "catface/app/utils/model_handler" "context" @@ -31,12 +32,14 @@ func CreateEncounterESFactory(encounter *model.Encounter) *Encounter { // INFO 存储能够作为索引存在的数据。 type Encounter struct { - Id int64 `json:"id"` - Title string `json:"title"` - Content string `json:"content"` - Tags []string `json:"tags"` + Id int64 `json:"id"` + Title string `json:"title"` + Content string `json:"content"` + Tags []string `json:"tags"` + Embedding []float64 `json:"embedding"` - TagsHighlight []string `json:"tags_highlight"` + // TagsHighlight []string `json:"tags_highlight"` // TODO 如何 insert 时忽略,query 时绑定。 + TagsHighlight []string `json:"-" bind:"tags_highlight"` // TODO 如何 insert 时忽略,query 时绑定。 } func (e *Encounter) IndexName() string { @@ -46,6 +49,11 @@ func (e *Encounter) IndexName() string { func (e *Encounter) InsertDocument() error { ctx := context.Background() + var ok bool + if e.Embedding, ok = nlp.GetEmbedding([]string{e.Title, e.Content}); !ok { + return fmt.Errorf("nlp embedding service error") + } + // 将结构体转换为 JSON 字符串 data, err := json.Marshal(e) if err != nil { @@ -157,7 +165,8 @@ func (e *Encounter) QueryDocumentsMatchAll(query string, num int) ([]Encounter, "post_tags": [""] } } - } + }, + "_source": ["id", "title", "content", "tags"] }`, num, query, query, query, consts.PreTags, consts.PostTags) hits, err := model_handler.SearchRequest(body, e.IndexName()) diff --git a/app/utils/data_bind/formdata_to_model.go b/app/utils/data_bind/formdata_to_model.go index 42f3e3e..90311c8 100644 --- a/app/utils/data_bind/formdata_to_model.go +++ b/app/utils/data_bind/formdata_to_model.go @@ -109,6 +109,9 @@ func ShouldBindFormMapToModel(m map[string]interface{}, modelStruct interface{}) func fieldSetValueByMap(m map[string]interface{}, valueOf reflect.Value, typeOf reflect.Type, colIndex int) { relaKey := typeOf.Field(colIndex).Tag.Get("json") + if relaKey == "-" { // TIP 增加新的 tag bind,实现自定义的绑定,和原本的 json 区分。 + relaKey = typeOf.Field(colIndex).Tag.Get("bind") + } if relaKey != "-" && m[relaKey] != nil { switch typeOf.Field(colIndex).Type.Kind() { case reflect.Int, reflect.Int16, reflect.Int32, reflect.Int64: