🆕 doc upload API # pre for RAG

This commit is contained in:
Havoc412 2024-11-15 15:05:54 +08:00
parent 55cf9e77e4
commit 1156b44466
10 changed files with 177 additions and 3 deletions

View File

@ -0,0 +1,28 @@
package web
import (
"catface/app/global/consts"
"catface/app/global/variable"
"catface/app/model"
"catface/app/utils/response"
"path/filepath"
"github.com/gin-gonic/gin"
)
type Docs struct {
}
func (d *Docs) Upload(context *gin.Context) {
// TODO 1. 读取源文件,调用 py API 分块上传。
path := context.GetString(consts.ValidatorPrefix + "path")
filePath := filepath.Join(variable.ConfigYml.GetString("FileUploadSetting.UploadFileSavePath"), variable.ConfigYml.GetString("FileUploadSetting.DocsRootPath"), path)
_ = filePath
// STAGE 2.
if ok := model.CreateDocFactory("").InsertDocumentData(context); ok {
response.Success(context, consts.CurdStatusOkMsg, "")
} else {
response.Fail(context, consts.CurdCreatFailCode, consts.CurdCreatFailMsg, "上传文档错误")
}
}

View File

@ -7,6 +7,7 @@ import (
"catface/app/http/validator/common/websocket"
"catface/app/http/validator/web/animal"
"catface/app/http/validator/web/animal_like"
"catface/app/http/validator/web/doc"
"catface/app/http/validator/web/encounter"
"catface/app/http/validator/web/encounter_like"
"catface/app/http/validator/web/knowledge"
@ -91,4 +92,8 @@ func WebRegisterValidator() {
// TAG Knowledge
key = consts.ValidatorPrefix + "KnowledgeRandomList"
containers.Set(key, knowledge.Random{})
// TAG Doc
key = consts.ValidatorPrefix + "DocUpload"
containers.Set(key, doc.Upload{})
}

View File

@ -33,7 +33,6 @@ func (u UpFiles) CheckParams(context *gin.Context) {
// 2. File 内容的验证;
tmpFile, err := context.FormFile(variable.ConfigYml.GetString("FileUploadSetting.UploadFileField")) // file 是一个文件结构体(文件对象)
var isPass bool
//获取文件发生错误,可能上传了空文件等
if err != nil {
response.Fail(context, consts.FilesUploadFailCode, consts.FilesUploadFailMsg, err.Error())
@ -51,8 +50,10 @@ func (u UpFiles) CheckParams(context *gin.Context) {
return
}
//不允许的文件mime类型
var isPass bool
var mimeType string
if fp, err := tmpFile.Open(); err == nil {
mimeType := files.GetFilesMimeByFp(fp)
mimeType = files.GetFilesMimeByFp(fp)
for _, value := range variable.ConfigYml.GetStringSlice("FileUploadSetting.AllowMimeType") {
if strings.ReplaceAll(value, " ", "") == strings.ReplaceAll(mimeType, " ", "") {
@ -67,7 +68,9 @@ func (u UpFiles) CheckParams(context *gin.Context) {
}
//凡是存在相等的类型,通过验证,调用控制器
if !isPass {
response.Fail(context, consts.FilesUploadMimeTypeFailCode, consts.FilesUploadMimeTypeFailMsg, "")
response.Fail(context, consts.FilesUploadMimeTypeFailCode, consts.FilesUploadMimeTypeFailMsg, gin.H{
"mime_type": mimeType,
})
} else {
(&web.Upload{}).StartUpload(context)
}

View File

@ -0,0 +1,31 @@
package doc
import (
"catface/app/global/consts"
"catface/app/http/controller/web"
"catface/app/http/validator/core/data_transfer"
"catface/app/utils/response"
"github.com/gin-gonic/gin"
)
type Upload struct {
Name string `form:"name" json:"name" binding:"required"`
Path string `form:"path" json:"path" binding:"required"`
}
func (d Upload) CheckParams(context *gin.Context) {
// 基本参数鉴定。
if err := context.ShouldBind(&d); err != nil {
response.ValidatorError(context, err)
return
}
extraAddBindDataContext := data_transfer.DataAddContext(d, consts.ValidatorPrefix, context)
if extraAddBindDataContext == nil {
response.ErrorSystem(context, "upload doc 表单验证器json化失败", "")
} else {
// 验证完成,调用控制器,并将验证器成员(字段)递给控制器,保持上下文数据一致性
(&web.Docs{}).Upload(extraAddBindDataContext)
}
}

37
app/model/doc.go Normal file
View File

@ -0,0 +1,37 @@
package model
import (
"catface/app/global/variable"
"catface/app/utils/data_bind"
"github.com/gin-gonic/gin"
"go.uber.org/zap"
)
// INFO @brief 这个 model 是便于宏观管理知识库文件的。
func CreateDocFactory(sqlType string) *Doc {
return &Doc{BaseModel: BaseModel{DB: UseDbConn(sqlType)}}
}
type Doc struct {
BaseModel
Name string `gorm:"name" json:"name"` // 文件名保存原本的设定,但是实际存储的【真名】还是借助 Snow + MD5 防止冲突;
Path string `gorm:"path" json:"path"`
}
func (d *Doc) TableName() string { return "docs" }
func (d *Doc) InsertDocumentData(c *gin.Context) bool {
var tmp Doc
if err := data_bind.ShouldBindFormDataToModel(c, &tmp); err == nil {
if res := d.Create(&tmp); res.Error == nil {
return true
} else {
variable.ZapLog.Error("Doc 数据新增出错", zap.Error(res.Error))
}
} else {
variable.ZapLog.Error("Doc 数据绑定出错", zap.Error(err))
}
return false
}

20
app/model_es/doc.go Normal file
View File

@ -0,0 +1,20 @@
package model_es
// INFO @brief 这个文件就是处理 ES 存储文档特征向量的集中处理
func CreateDocESFactory() *Doc {
return &Doc{}
}
type Doc struct {
Content string `json:"content"`
Embedding []float64 `json:"embedding"`
}
func (d *Doc) IndexName() string {
return "catface_docs"
}
func (d *Doc) InsertDocument() error {
return nil
}

View File

@ -0,0 +1,22 @@
package curd
import (
"catface/app/model"
"github.com/gin-gonic/gin"
)
func CreateDocCurdFactory() *DocCurd {
return &DocCurd{doc: model.CreateDocFactory("")}
}
type DocCurd struct {
doc *model.Doc
}
// UPDATE 好像有点没有必要性。
func (d *DocCurd) InsertDocumentData(c *gin.Context) bool {
// TODO insert embedding to ES // INFO 调用 py 的服务?
// STAGE insert data to mysql
return d.doc.InsertDocumentData(c)
}

View File

@ -62,6 +62,7 @@ FileUploadSetting:
UploadFileSavePath: "D:/.File Data/GoProject/catface_back/public/nginx" # 上传文件保存在路径, 该路径与 BasePath 进行拼接使用
UploadFileReturnPath: "" # 文件上后返回的路径由于程序会自动创建软连接自动将资源定位到实际路径所有资源的访问入口建议都从public开始
AllowMimeType: #允许的文件mime类型列表
# TAG 图像 / 视频
- "image/jpeg" #jpg、jpeg图片格式
- "image/png" #png图片格式
# - "image/x-icon" #ico图片
@ -71,7 +72,13 @@ FileUploadSetting:
- "text/plain; charset=utf-8" #txt log json等文本文件
# - "video/mp4" #视频文件例如mp4
# - "audio/mpeg" #音频文件,例如: mp3
# TAG 文档
- "application/msword" # .doc
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document" # .docx
- "application/pdf" # .pdf
- "application/octet-stream" # 适配 APIfox 的调用
AvatarWidth: 200
DocsRootPath: "docs" # TODO 或许 upload 模块可以写一下自动区分大致的文件类型所在的位置。
# casbin 权限控制api接口
Casbin:

View File

@ -159,6 +159,11 @@ func InitWebRouter() *gin.Engine {
{
knowledge.GET("random", validatorFactory.Create(consts.ValidatorPrefix+"KnowledgeRandomList"))
}
doc := backend.Group("doc")
{
doc.POST("", validatorFactory.Create(consts.ValidatorPrefix+"DocUpload"))
}
// }
}

16
test/model_2_test.go Normal file
View File

@ -0,0 +1,16 @@
package test
import (
"catface/app/model"
"testing"
)
func TestDocModel(t *testing.T) {
Init()
doc := model.Doc{}
err := DB.AutoMigrate(&doc)
if err != nil {
t.Error(err)
}
}