Havoc412 3b719c3add feat(model_es): 增加 NLP 生成的 embedding 字段并优化 ES 操作
- 在 Encounter 结构中添加 Embedding 字段,用于存储 NLP 生成的向量
- 修改 InsertDocument 方法,增加 embedding 数据的生成和存储
- 更新 QueryDocumentsMatchAll 方法,调整搜索结果的源字段
- 优化数据绑定逻辑,支持自定义 bind 标签
- 添加 TODO 注释,标记需要进一步解决的问题
2024-11-19 11:27:17 +08:00

191 lines
4.4 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package model_es
import (
"bytes"
"catface/app/global/consts"
"catface/app/global/variable"
"catface/app/model"
"catface/app/service/nlp"
"catface/app/utils/data_bind"
"catface/app/utils/model_handler"
"context"
"encoding/json"
"fmt"
"github.com/elastic/go-elasticsearch/v8"
"github.com/elastic/go-elasticsearch/v8/esapi"
)
func CreateEncounterESFactory(encounter *model.Encounter) *Encounter {
if encounter == nil { // UPDATE 这样写好丑。
return &Encounter{}
}
// 我把数值绑定到了工厂创建当中。
return &Encounter{
Id: encounter.Id,
Title: encounter.Title,
Content: encounter.Content,
Tags: encounter.TagsList, // TODO 暂时没有对此字段的查询。
}
}
// INFO 存储能够作为索引存在的数据。
type Encounter struct {
Id int64 `json:"id"`
Title string `json:"title"`
Content string `json:"content"`
Tags []string `json:"tags"`
Embedding []float64 `json:"embedding"`
// TagsHighlight []string `json:"tags_highlight"` // TODO 如何 insert 时忽略query 时绑定。
TagsHighlight []string `json:"-" bind:"tags_highlight"` // TODO 如何 insert 时忽略query 时绑定。
}
func (e *Encounter) IndexName() string {
return "catface_encounters"
}
func (e *Encounter) InsertDocument() error {
ctx := context.Background()
var ok bool
if e.Embedding, ok = nlp.GetEmbedding([]string{e.Title, e.Content}); !ok {
return fmt.Errorf("nlp embedding service error")
}
// 将结构体转换为 JSON 字符串
data, err := json.Marshal(e)
if err != nil {
return err
}
// 创建请求
req := esapi.IndexRequest{
Index: e.IndexName(),
DocumentID: fmt.Sprintf("%d", e.Id),
Body: bytes.NewReader(data),
Refresh: "true",
}
// 发送请求
res, err := req.Do(ctx, variable.ElasticClient)
if err != nil {
return err
}
defer res.Body.Close()
if res.IsError() {
var e map[string]interface{}
if err := json.NewDecoder(res.Body).Decode(&e); err != nil {
return fmt.Errorf("error parsing the response body: %s", err)
} else {
return fmt.Errorf("[%s] %s: %s",
res.Status(),
e["error"].(map[string]interface{})["type"],
e["error"].(map[string]interface{})["reason"],
)
}
}
return nil
}
// TODO 改正,仿 Insert
func (e *Encounter) UpdateDocument(client *elasticsearch.Client, encounter *Encounter) error {
ctx := context.Background()
// 将结构体转换为 JSON 字符串
data, err := json.Marshal(map[string]interface{}{
"doc": encounter,
})
if err != nil {
return err
}
// 创建请求
req := esapi.UpdateRequest{
Index: encounter.IndexName(),
DocumentID: fmt.Sprintf("%d", encounter.Id),
Body: bytes.NewReader(data),
Refresh: "true",
}
// 发送请求
res, err := req.Do(ctx, client)
if err != nil {
return err
}
defer res.Body.Close()
if res.IsError() {
var e map[string]interface{}
if err := json.NewDecoder(res.Body).Decode(&e); err != nil {
return fmt.Errorf("error parsing the response body: %s", err)
} else {
return fmt.Errorf("[%s] %s: %s",
res.Status(),
e["error"].(map[string]interface{})["type"],
e["error"].(map[string]interface{})["reason"],
)
}
}
return nil
}
/**
* @description: 粗略地包含各种关键词匹配,
* @param {*elasticsearch.Client} client
* @param {string} query
* @return {*} 对应 Encounter 的 id然后交给 MySQL 来查询详细的信息?
*/
func (e *Encounter) QueryDocumentsMatchAll(query string, num int) ([]Encounter, error) {
body := fmt.Sprintf(`{
"size": %d,
"query": {
"bool": {
"should": [
{"match": {"tags": "%s"}},
{"match": {"content": "%s"}},
{"match": {"title": "%s"}}
]
}
},
"highlight": {
"pre_tags": ["%v"],
"post_tags": ["%v"],
"fields": {
"title": {},
"content": {
"fragment_size" : 15
},
"tags": {
"pre_tags": [""],
"post_tags": [""]
}
}
},
"_source": ["id", "title", "content", "tags"]
}`, num, query, query, query, consts.PreTags, consts.PostTags)
hits, err := model_handler.SearchRequest(body, e.IndexName())
if err != nil {
return nil, err
}
var encounters []Encounter
for _, hit := range hits {
data := model_handler.MergeSouceWithHighlight(hit.(map[string]interface{}))
var encounter Encounter
if err := data_bind.ShouldBindFormMapToModel(data, &encounter); err != nil {
continue
}
encounters = append(encounters, encounter)
}
return encounters, nil
}