扣子智能体
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

550 lines
18 KiB

/*
* Copyright 2025 coze-dev Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package service
import (
"context"
"errors"
"strconv"
"sync"
"time"
"github.com/bytedance/sonic"
"github.com/cloudwego/eino/schema"
"golang.org/x/sync/errgroup"
knowledgeModel "github.com/coze-dev/coze-studio/backend/api/model/crossdomain/knowledge"
crossdatacopy "github.com/coze-dev/coze-studio/backend/crossdomain/contract/datacopy"
"github.com/coze-dev/coze-studio/backend/domain/datacopy"
copyEntity "github.com/coze-dev/coze-studio/backend/domain/datacopy/entity"
"github.com/coze-dev/coze-studio/backend/domain/knowledge/entity"
"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/consts"
"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/convert"
"github.com/coze-dev/coze-studio/backend/domain/knowledge/internal/dal/model"
"github.com/coze-dev/coze-studio/backend/infra/contract/document"
"github.com/coze-dev/coze-studio/backend/infra/contract/document/searchstore"
"github.com/coze-dev/coze-studio/backend/infra/contract/rdb"
rdbEntity "github.com/coze-dev/coze-studio/backend/infra/contract/rdb/entity"
"github.com/coze-dev/coze-studio/backend/pkg/errorx"
"github.com/coze-dev/coze-studio/backend/pkg/lang/slices"
"github.com/coze-dev/coze-studio/backend/pkg/logs"
"github.com/coze-dev/coze-studio/backend/types/errno"
)
func (k *knowledgeSVC) CopyKnowledge(ctx context.Context, request *CopyKnowledgeRequest) (*CopyKnowledgeResponse, error) {
if request == nil {
return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
}
if len(request.TaskUniqKey) == 0 {
return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "task uniq key is empty"))
}
if request.KnowledgeID == 0 {
return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "knowledge id is empty"))
}
kn, err := k.knowledgeRepo.GetByID(ctx, request.KnowledgeID)
if err != nil {
return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
}
if kn == nil || kn.ID == 0 {
return nil, errorx.New(errno.ErrKnowledgeNotExistCode, errorx.KV("msg", "knowledge not exist"))
}
newID, err := k.idgen.GenID(ctx)
if err != nil {
return nil, errorx.New(errno.ErrKnowledgeIDGenCode)
}
copyTaskEntity := copyEntity.CopyDataTask{
TaskUniqKey: request.TaskUniqKey,
OriginDataID: request.KnowledgeID,
TargetDataID: newID,
OriginSpaceID: kn.SpaceID,
TargetSpaceID: request.TargetSpaceID,
OriginUserID: kn.CreatorID,
TargetUserID: request.TargetUserID,
OriginAppID: kn.AppID,
TargetAppID: request.TargetAppID,
DataType: copyEntity.DataTypeKnowledge,
StartTime: time.Now().UnixMilli(),
FinishTime: 0,
ExtInfo: "",
ErrorMsg: "",
Status: copyEntity.DataCopyTaskStatusCreate,
}
checkResult, err := crossdatacopy.DefaultSVC().CheckAndGenCopyTask(ctx, &datacopy.CheckAndGenCopyTaskReq{Task: &copyTaskEntity})
if err != nil {
return nil, errorx.New(errno.ErrKnowledgeCrossDomainCode, errorx.KV("msg", err.Error()))
}
switch checkResult.CopyTaskStatus {
case copyEntity.DataCopyTaskStatusSuccess:
return &CopyKnowledgeResponse{
OriginKnowledgeID: request.KnowledgeID,
TargetKnowledgeID: checkResult.TargetID,
CopyStatus: knowledgeModel.CopyStatus_Successful,
ErrMsg: "",
}, nil
case copyEntity.DataCopyTaskStatusInProgress:
return &CopyKnowledgeResponse{
OriginKnowledgeID: request.KnowledgeID,
TargetKnowledgeID: checkResult.TargetID,
CopyStatus: knowledgeModel.CopyStatus_Processing,
ErrMsg: "",
}, nil
case copyEntity.DataCopyTaskStatusFail:
return &CopyKnowledgeResponse{
OriginKnowledgeID: request.KnowledgeID,
TargetKnowledgeID: checkResult.TargetID,
CopyStatus: knowledgeModel.CopyStatus_Failed,
ErrMsg: checkResult.FailReason,
}, nil
}
copyResp, err := k.copyDo(ctx, &knowledgeCopyCtx{
OriginData: kn,
CopyTask: &copyTaskEntity,
})
if err != nil {
return nil, err
}
return copyResp, nil
}
func (k *knowledgeSVC) copyDo(ctx context.Context, copyCtx *knowledgeCopyCtx) (*CopyKnowledgeResponse, error) {
var err error
defer func() {
if e := recover(); e != nil {
logs.CtxErrorf(ctx, "copy knowledge failed, err: %v", e)
err = errorx.New(errno.ErrKnowledgeSystemCode, errorx.KVf("msg", "panic: %v", e))
}
if err != nil {
deleteErr := k.DeleteKnowledge(ctx, &DeleteKnowledgeRequest{KnowledgeID: copyCtx.CopyTask.TargetDataID})
if deleteErr != nil {
logs.CtxErrorf(ctx, "delete knowledge failed, err: %v", deleteErr)
}
if len(copyCtx.NewRDBTableNames) != 0 {
for i := range copyCtx.NewRDBTableNames {
_, dropErr := k.rdb.DropTable(ctx, &rdb.DropTableRequest{
TableName: copyCtx.NewRDBTableNames[i],
IfExists: true,
})
if dropErr != nil {
logs.CtxErrorf(ctx, "[copyDo] drop table failed, err: %v", dropErr)
}
}
}
copyCtx.CopyTask.Status = copyEntity.DataCopyTaskStatusFail
err = crossdatacopy.DefaultSVC().UpdateCopyTask(ctx, &datacopy.UpdateCopyTaskReq{Task: copyCtx.CopyTask})
if err != nil {
logs.CtxErrorf(ctx, "update copy task failed, err: %v", err)
}
}
}()
copyCtx.CopyTask.Status = copyEntity.DataCopyTaskStatusInProgress
err = crossdatacopy.DefaultSVC().UpdateCopyTask(ctx, &datacopy.UpdateCopyTaskReq{Task: copyCtx.CopyTask})
if err != nil {
return nil, errorx.New(errno.ErrKnowledgeCrossDomainCode, errorx.KV("msg", err.Error()))
}
err = k.copyKnowledge(ctx, copyCtx)
if err != nil {
logs.CtxErrorf(ctx, "copy knowledge failed, err: %v", err)
return nil, err
}
err = k.copyKnowledgeDocuments(ctx, copyCtx)
if err != nil {
return nil, err
}
copyCtx.CopyTask.FinishTime = time.Now().UnixMilli()
copyCtx.CopyTask.Status = copyEntity.DataCopyTaskStatusSuccess
err = crossdatacopy.DefaultSVC().UpdateCopyTask(ctx, &datacopy.UpdateCopyTaskReq{Task: copyCtx.CopyTask})
if err != nil {
logs.CtxWarnf(ctx, "update copy task failed, err: %v", err)
}
return &CopyKnowledgeResponse{
OriginKnowledgeID: copyCtx.OriginData.ID,
TargetKnowledgeID: copyCtx.CopyTask.TargetDataID,
CopyStatus: knowledgeModel.CopyStatus_Successful,
ErrMsg: "",
}, nil
}
func (k *knowledgeSVC) copyKnowledge(ctx context.Context, copyCtx *knowledgeCopyCtx) error {
copyKnowledgeInfo := model.Knowledge{
ID: copyCtx.CopyTask.TargetDataID,
Name: copyCtx.OriginData.Name,
AppID: copyCtx.CopyTask.TargetAppID,
CreatorID: copyCtx.CopyTask.TargetUserID,
SpaceID: copyCtx.CopyTask.TargetSpaceID,
CreatedAt: time.Now().UnixMilli(),
UpdatedAt: time.Now().UnixMilli(),
Status: int32(knowledgeModel.KnowledgeStatusEnable),
Description: copyCtx.OriginData.Description,
IconURI: copyCtx.OriginData.IconURI,
FormatType: copyCtx.OriginData.FormatType,
}
err := k.knowledgeRepo.Upsert(ctx, &copyKnowledgeInfo)
if err != nil {
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
}
return nil
}
func (k *knowledgeSVC) copyKnowledgeDocuments(ctx context.Context, copyCtx *knowledgeCopyCtx) (err error) {
// Query document information (only processed documents)
documents, _, err := k.documentRepo.FindDocumentByCondition(ctx, &entity.WhereDocumentOpt{
KnowledgeIDs: []int64{copyCtx.OriginData.ID},
StatusIn: []int32{int32(entity.DocumentStatusEnable), int32(entity.DocumentStatusInit)},
SelectAll: true,
})
if err != nil {
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
}
if len(documents) == 0 {
logs.CtxInfof(ctx, "knowledge %d has no document", copyCtx.OriginData.ID)
return nil
}
// create search store collections
docItem, err := k.fromModelDocument(ctx, documents[0])
if err != nil {
return err
}
fields, err := k.mapSearchFields(docItem)
if err != nil {
return err
}
collectionName := getCollectionName(copyCtx.CopyTask.TargetDataID)
for _, ssMgr := range k.searchStoreManagers {
if err = ssMgr.Create(ctx, &searchstore.CreateRequest{
CollectionName: collectionName,
Fields: fields,
CollectionMeta: nil,
}); err != nil {
return errorx.New(errno.ErrKnowledgeSearchStoreCode, errorx.KV("msg", err.Error()))
}
}
targetDocuments, _, err := k.documentRepo.FindDocumentByCondition(ctx, &entity.WhereDocumentOpt{
KnowledgeIDs: []int64{copyCtx.CopyTask.TargetDataID},
SelectAll: true,
})
if err != nil {
logs.CtxErrorf(ctx, "find target document failed, err: %v", err)
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
}
for i := range targetDocuments {
err = k.DeleteDocument(ctx, &DeleteDocumentRequest{DocumentID: targetDocuments[i].ID})
if err != nil {
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
}
}
// table copy
eg := errgroup.Group{}
eg.SetLimit(10)
mu := sync.Mutex{}
var failList []int64
newIDs, err := k.genMultiIDs(ctx, len(documents))
if err != nil {
logs.CtxErrorf(ctx, "gen document id failed, err: %v", err)
return errorx.New(errno.ErrKnowledgeIDGenCode)
}
for i := range documents {
doc := documents[i]
newID := newIDs[i]
eg.Go(func() error {
cpErr := k.copyDocument(ctx, copyCtx, doc, newID)
if cpErr != nil {
mu.Lock()
failList = append(failList, doc.ID)
mu.Unlock()
logs.CtxErrorf(ctx, "copy document failed, src document id: %d, new id: %d, err: %v", doc.ID, newID, err)
return cpErr
}
return nil
})
}
if err := eg.Wait(); err != nil {
logs.CtxErrorf(ctx, "copy document failed, document ids: %v, first-err: %v", failList, err)
return errorx.New(errno.ErrKnowledgeCopyFailCode, errorx.KV("msg", err.Error()))
}
return nil
}
func (k *knowledgeSVC) copyDocument(ctx context.Context, copyCtx *knowledgeCopyCtx, doc *model.KnowledgeDocument, newDocID int64) (err error) {
// tabular document replication
newDoc := model.KnowledgeDocument{
ID: newDocID,
KnowledgeID: copyCtx.CopyTask.TargetDataID,
Name: doc.Name,
FileExtension: doc.FileExtension,
DocumentType: doc.DocumentType,
URI: doc.URI,
Size: doc.Size,
SliceCount: doc.SliceCount,
CharCount: doc.CharCount,
CreatorID: copyCtx.CopyTask.TargetUserID,
SpaceID: copyCtx.CopyTask.TargetSpaceID,
CreatedAt: time.Now().UnixMilli(),
UpdatedAt: time.Now().UnixMilli(),
SourceType: doc.SourceType,
Status: int32(entity.DocumentStatusChunking),
FailReason: "",
ParseRule: doc.ParseRule,
}
columnMap := map[int64]int64{}
// If it is a tabular knowledge base - > create a new table
if doc.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
if doc.TableInfo != nil {
newTableInfo := entity.TableInfo{}
data, err := sonic.Marshal(doc.TableInfo)
if err != nil {
return err
}
err = sonic.Unmarshal(data, &newTableInfo)
if err != nil {
return err
}
newDoc.TableInfo = &newTableInfo
}
err = k.createTable(ctx, &newDoc)
if err != nil {
return err
}
newColumnName2IDMap := map[string]int64{}
for i := range newDoc.TableInfo.Columns {
newColumnName2IDMap[newDoc.TableInfo.Columns[i].Name] = newDoc.TableInfo.Columns[i].ID
}
oldColumnName2IDMap := map[string]int64{}
for i := range doc.TableInfo.Columns {
oldColumnName2IDMap[doc.TableInfo.Columns[i].Name] = doc.TableInfo.Columns[i].ID
newDoc.TableInfo.Columns[i].ID = newColumnName2IDMap[doc.TableInfo.Columns[i].Name]
}
for i := range doc.TableInfo.Columns {
columnMap[oldColumnName2IDMap[doc.TableInfo.Columns[i].Name]] = newDoc.TableInfo.Columns[i].ID
}
copyCtx.NewRDBTableNames = append(copyCtx.NewRDBTableNames, newDoc.TableInfo.PhysicalTableName)
}
docEntity, err := k.fromModelDocument(ctx, &newDoc)
if err != nil {
return err
}
fields, err := k.mapSearchFields(docEntity)
if err != nil {
logs.CtxErrorf(ctx, "map search fields failed, err: %v", err)
return errorx.New(errno.ErrKnowledgeSystemCode, errorx.KV("msg", err.Error()))
}
indexingFields := getIndexingFields(fields)
collectionName := getCollectionName(newDoc.KnowledgeID)
sliceIDs, err := k.sliceRepo.GetDocumentSliceIDs(ctx, []int64{doc.ID})
if err != nil {
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
}
err = k.documentRepo.Create(ctx, &newDoc)
if err != nil {
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
}
defer func() {
status := int32(entity.DocumentStatusEnable)
msg := ""
if err != nil {
status = int32(entity.DocumentStatusFailed)
msg = err.Error()
}
updateErr := k.documentRepo.SetStatus(ctx, newDoc.ID, status, msg)
if updateErr != nil {
logs.CtxErrorf(ctx, "update document status failed, err: %v", updateErr)
}
}()
batchSize := 100
for i := 0; i < len(sliceIDs); i += batchSize {
end := i + batchSize
if end > len(sliceIDs) {
end = len(sliceIDs)
}
sliceInfo, err := k.sliceRepo.MGetSlices(ctx, sliceIDs[i:end])
if err != nil {
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
}
newSliceModels := make([]*model.KnowledgeDocumentSlice, 0)
newSliceIDs, err := k.genMultiIDs(ctx, len(sliceInfo))
if err != nil {
return errorx.New(errno.ErrKnowledgeIDGenCode, errorx.KV("msg", err.Error()))
}
old2NewIDMap := map[int64]int64{}
newMap := map[int64]*model.KnowledgeDocumentSlice{}
for t := range sliceInfo {
old2NewIDMap[sliceInfo[t].ID] = newSliceIDs[t]
newSliceModel := model.KnowledgeDocumentSlice{
ID: old2NewIDMap[sliceInfo[t].ID],
KnowledgeID: copyCtx.CopyTask.TargetDataID,
DocumentID: newDocID,
Content: sliceInfo[t].Content,
Sequence: sliceInfo[t].Sequence,
CreatedAt: time.Now().UnixMilli(),
UpdatedAt: time.Now().UnixMilli(),
CreatorID: copyCtx.CopyTask.TargetUserID,
SpaceID: copyCtx.CopyTask.TargetSpaceID,
Status: int32(model.SliceStatusDone),
FailReason: "",
Hit: 0,
}
newMap[newSliceIDs[t]] = &newSliceModel
}
var sliceEntities []*entity.Slice
if doc.DocumentType == int32(knowledgeModel.DocumentTypeTable) {
sliceMap, err := k.selectTableData(ctx, doc.TableInfo, sliceInfo)
if err != nil {
logs.CtxErrorf(ctx, "select table data failed, err: %v", err)
return err
}
newSlices := make([]*entity.Slice, 0)
for id, info := range sliceMap {
info.DocumentID = newDocID
info.Hit = 0
info.DocumentName = doc.Name
info.ID = old2NewIDMap[id]
for t := range info.RawContent[0].Table.Columns {
info.RawContent[0].Table.Columns[t].ColumnID = columnMap[info.RawContent[0].Table.Columns[t].ColumnID]
}
newSlices = append(newSlices, info)
}
err = k.upsertDataToTable(ctx, newDoc.TableInfo, newSlices)
if err != nil {
logs.CtxErrorf(ctx, "upsert data to table failed, err: %v", err)
return err
}
sliceEntities = newSlices
}
for _, v := range newMap {
cpSlice := v
newSliceModels = append(newSliceModels, cpSlice)
if doc.DocumentType != int32(knowledgeModel.DocumentTypeTable) {
sliceEntities = append(sliceEntities, k.fromModelSlice(ctx, cpSlice))
}
}
ssDocs, err := slices.TransformWithErrorCheck(sliceEntities, func(a *entity.Slice) (*schema.Document, error) {
return k.slice2Document(ctx, docEntity, a)
})
if err != nil {
return err
}
for _, mgr := range k.searchStoreManagers {
ss, err := mgr.GetSearchStore(ctx, collectionName)
if err != nil {
return errorx.New(errno.ErrKnowledgeSearchStoreCode, errorx.KV("msg", err.Error()))
}
if _, err = ss.Store(ctx, ssDocs,
searchstore.WithIndexerPartitionKey(fieldNameDocumentID),
searchstore.WithPartition(strconv.FormatInt(newDoc.ID, 10)),
searchstore.WithIndexingFields(indexingFields),
); err != nil {
return errorx.New(errno.ErrKnowledgeSearchStoreCode, errorx.KV("msg", err.Error()))
}
}
err = k.sliceRepo.BatchCreate(ctx, newSliceModels)
if err != nil {
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
}
}
return nil
}
func (k *knowledgeSVC) createTable(ctx context.Context, doc *model.KnowledgeDocument) error {
// Tabular knowledge base, creating tables
rdbColumns := []*rdbEntity.Column{}
tableColumns := doc.TableInfo.Columns
columnIDs, err := k.genMultiIDs(ctx, len(tableColumns)+1)
if err != nil {
return err
}
for i := range tableColumns {
tableColumns[i].ID = columnIDs[i]
rdbColumns = append(rdbColumns, &rdbEntity.Column{
Name: convert.ColumnIDToRDBField(columnIDs[i]),
DataType: convert.ConvertColumnType(tableColumns[i].Type),
NotNull: tableColumns[i].Indexing,
})
}
doc.TableInfo.Columns = append(doc.TableInfo.Columns, &entity.TableColumn{
ID: columnIDs[len(columnIDs)-1],
Name: consts.RDBFieldID,
Type: document.TableColumnTypeInteger,
Description: "主键ID",
Indexing: false,
Sequence: -1,
})
// Add a primary key ID to each table
rdbColumns = append(rdbColumns, &rdbEntity.Column{
Name: consts.RDBFieldID,
DataType: rdbEntity.TypeBigInt,
NotNull: true,
})
// Create a data table
resp, err := k.rdb.CreateTable(ctx, &rdb.CreateTableRequest{
Table: &rdbEntity.Table{
Columns: rdbColumns,
Indexes: []*rdbEntity.Index{
{
Name: "pk",
Type: rdbEntity.PrimaryKey,
Columns: []string{consts.RDBFieldID},
},
},
},
})
if err != nil {
logs.CtxErrorf(ctx, "create table failed, err: %v", err)
return err
}
doc.TableInfo = &entity.TableInfo{
VirtualTableName: doc.Name,
PhysicalTableName: resp.Table.Name,
TableDesc: doc.TableInfo.TableDesc,
Columns: doc.TableInfo.Columns,
}
return nil
}
type knowledgeCopyCtx struct {
OriginData *model.Knowledge
CopyTask *copyEntity.CopyDataTask
NewRDBTableNames []string
}
func (k *knowledgeSVC) MoveKnowledgeToLibrary(ctx context.Context, request *MoveKnowledgeToLibraryRequest) error {
if request == nil || request.KnowledgeID == 0 {
return errors.New("invalid request")
}
kn, err := k.knowledgeRepo.GetByID(ctx, request.KnowledgeID)
if err != nil {
return err
}
if kn == nil || kn.ID == 0 {
return errors.New("knowledge not found")
}
kn.AppID = 0
err = k.knowledgeRepo.Update(ctx, kn)
return err
}