【新增】AI 知识库:文档切片向量化入库

This commit is contained in:
xiaoxin 2024-08-15 15:57:03 +08:00
parent b4af042c64
commit 8e54eef8af
14 changed files with 190 additions and 34 deletions

View File

@ -0,0 +1,39 @@
package cn.iocoder.yudao.module.ai.enums.knowledge;
import cn.iocoder.yudao.framework.common.core.IntArrayValuable;
import lombok.AllArgsConstructor;
import lombok.Getter;
import java.util.Arrays;
/**
* AI 知识库-文档状态的枚举
*
* @author xiaoxin
*/
@AllArgsConstructor
@Getter
public enum AiKnowledgeDocumentStatusEnum implements IntArrayValuable {
IN_PROGRESS(10, "索引中"),
SUCCESS(20, "可用"),
FAIL(30, "失败");
/**
* 状态
*/
private final Integer status;
/**
* 状态名
*/
private final String name;
public static final int[] ARRAYS = Arrays.stream(values()).mapToInt(AiKnowledgeDocumentStatusEnum::getStatus).toArray();
@Override
public int[] array() {
return ARRAYS;
}
}

View File

@ -14,11 +14,11 @@ import java.util.List;
@Data @Data
public class AiKnowledgeCreateMyReqVO { public class AiKnowledgeCreateMyReqVO {
@Schema(description = "知识库名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "") @Schema(description = "知识库名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "ruoyi-vue-pro 用户指南")
@NotBlank(message = "知识库名称不能为空") @NotBlank(message = "知识库名称不能为空")
private String name; private String name;
@Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "") @Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "存储 ruoyi-vue-pro 操作文档")
private String description; private String description;
@Schema(description = "可见权限,只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]") @Schema(description = "可见权限,只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]")

View File

@ -0,0 +1,27 @@
package cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo;
import io.swagger.v3.oas.annotations.media.Schema;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import lombok.Data;
/**
* @author xiaoxin
*/
@Schema(description = "管理后台 - AI 知识库【创建文档】 Request VO")
@Data
public class AiKnowledgeDocumentCreateReqVO {
@Schema(description = "知识库编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204")
@NotNull(message = "知识库编号不能为空")
private Long knowledgeId;
@Schema(description = "文档名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "三方登陆")
@NotBlank(message = "文档名称不能为空")
private String name;
@Schema(description = "文档 url", requiredMode = Schema.RequiredMode.REQUIRED, example = "https://doc.iocoder.cn")
private String url;
}

View File

@ -1,10 +1,13 @@
package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge; package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO; import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
import com.baomidou.mybatisplus.annotation.IdType; import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId; import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName; import com.baomidou.mybatisplus.annotation.TableName;
import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
import lombok.Data; import lombok.Data;
import java.util.List; import java.util.List;
@ -40,7 +43,8 @@ public class AiKnowledgeBaseDO extends BaseDO {
/** /**
* 可见权限,只能选择哪些人可见 * 可见权限,只能选择哪些人可见
*/ */
private List<String> visibilityPermissions; @TableField(typeHandler = JacksonTypeHandler.class)
private List<Long> visibilityPermissions;
/** /**
* 嵌入模型编号高质量模式时维护 * 嵌入模型编号高质量模式时维护
*/ */
@ -50,7 +54,9 @@ public class AiKnowledgeBaseDO extends BaseDO {
*/ */
private String model; private String model;
/** /**
* 是否启用 * 状态
* <p>
* 枚举 {@link CommonStatusEnum}
*/ */
private Boolean status; private Integer status;
} }

View File

@ -1,6 +1,8 @@
package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge; package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO; import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
import cn.iocoder.yudao.module.ai.enums.knowledge.AiKnowledgeDocumentStatusEnum;
import com.baomidou.mybatisplus.annotation.IdType; import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId; import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName; import com.baomidou.mybatisplus.annotation.TableName;
@ -46,10 +48,15 @@ public class AiKnowledgeDocumentDO extends BaseDO {
private Integer wordCount; private Integer wordCount;
/** /**
* 切片状态 * 切片状态
* <p>
* 枚举 {@link AiKnowledgeDocumentStatusEnum}
*/ */
private Integer sliceStatus; private Integer sliceStatus;
/** /**
* 是否启用 * 状态
* <p>
* 枚举 {@link CommonStatusEnum}
*/ */
private Boolean status; private Integer status;
} }

View File

@ -1,5 +1,6 @@
package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge; package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO; import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
import com.baomidou.mybatisplus.annotation.IdType; import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId; import com.baomidou.mybatisplus.annotation.TableId;
@ -41,8 +42,10 @@ public class AiKnowledgeSegmentDO extends BaseDO {
*/ */
private Integer tokens; private Integer tokens;
/** /**
* 是否启用 * 状态
* <p>
* 枚举 {@link CommonStatusEnum}
*/ */
private Boolean status; private Integer status;
} }

View File

@ -2,11 +2,13 @@ package cn.iocoder.yudao.module.ai.dal.mysql.knowledge;
import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX; import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX;
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeBaseDO; import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeBaseDO;
import org.apache.ibatis.annotations.Mapper;
/** /**
* AI 知识库基础信息 Mapper * AI 知识库基础信息 Mapper
* *
* @author xiaoxin * @author xiaoxin
*/ */
@Mapper
public interface AiKnowledgeBaseMapper extends BaseMapperX<AiKnowledgeBaseDO> { public interface AiKnowledgeBaseMapper extends BaseMapperX<AiKnowledgeBaseDO> {
} }

View File

@ -2,11 +2,13 @@ package cn.iocoder.yudao.module.ai.dal.mysql.knowledge;
import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX; import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX;
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO; import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
import org.apache.ibatis.annotations.Mapper;
/** /**
* AI 知识库-文档 Mapper * AI 知识库-文档 Mapper
* *
* @author xiaoxin * @author xiaoxin
*/ */
@Mapper
public interface AiKnowledgeDocumentMapper extends BaseMapperX<AiKnowledgeDocumentDO> { public interface AiKnowledgeDocumentMapper extends BaseMapperX<AiKnowledgeDocumentDO> {
} }

View File

@ -2,11 +2,13 @@ package cn.iocoder.yudao.module.ai.dal.mysql.knowledge;
import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX; import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX;
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO; import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
import org.apache.ibatis.annotations.Mapper;
/** /**
* AI 知识库-分片 Mapper * AI 知识库-分片 Mapper
* *
* @author xiaoxin * @author xiaoxin
*/ */
@Mapper
public interface AiKnowledgeSegmentMapper extends BaseMapperX<AiKnowledgeSegmentDO> { public interface AiKnowledgeSegmentMapper extends BaseMapperX<AiKnowledgeSegmentDO> {
} }

View File

@ -1,6 +1,7 @@
package cn.iocoder.yudao.module.ai.service.knowledge; package cn.iocoder.yudao.module.ai.service.knowledge;
import org.springframework.ai.document.Document; import org.springframework.ai.document.Document;
import org.springframework.ai.vectorstore.SearchRequest;
import java.util.List; import java.util.List;
@ -12,9 +13,9 @@ import java.util.List;
public interface AiEmbeddingService { public interface AiEmbeddingService {
/** /**
* 向量化文档 * 向量化文档并存储
*/ */
void embeddingDoc(); void add(List<Document> documents);
/** /**
@ -22,5 +23,5 @@ public interface AiEmbeddingService {
* *
* @param content 查询内容 * @param content 查询内容
*/ */
List<Document> similaritySearch(String content); List<Document> similaritySearch(SearchRequest request);
} }

View File

@ -2,11 +2,9 @@ package cn.iocoder.yudao.module.ai.service.knowledge;
import jakarta.annotation.Resource; import jakarta.annotation.Resource;
import org.springframework.ai.document.Document; import org.springframework.ai.document.Document;
import org.springframework.ai.reader.tika.TikaDocumentReader;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.RedisVectorStore; import org.springframework.ai.vectorstore.RedisVectorStore;
import org.springframework.ai.vectorstore.SearchRequest; import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.beans.factory.annotation.Value; import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.List; import java.util.List;
@ -21,27 +19,14 @@ public class AiEmbeddingServiceImpl implements AiEmbeddingService {
@Resource @Resource
private RedisVectorStore vectorStore; private RedisVectorStore vectorStore;
@Resource
private TokenTextSplitter tokenTextSplitter;
// TODO @xin 临时测试用后续删
@Value("classpath:/webapp/test/Fel.pdf")
private org.springframework.core.io.Resource data;
@Override @Override
public void embeddingDoc() { public void add(List<Document> documents) {
// 读取文件 vectorStore.add(documents);
TikaDocumentReader loader = new TikaDocumentReader(data);
List<Document> documents = loader.get();
// 文档分段
List<Document> segments = tokenTextSplitter.apply(documents);
// 向量化并存储
vectorStore.add(segments);
} }
@Override @Override
public List<Document> similaritySearch(String content) { public List<Document> similaritySearch(SearchRequest request) {
SearchRequest request = SearchRequest.query(content);
return vectorStore.similaritySearch(request); return vectorStore.similaritySearch(request);
} }
} }

View File

@ -2,6 +2,7 @@ package cn.iocoder.yudao.module.ai.service.knowledge;
import cn.hutool.core.lang.Assert; import cn.hutool.core.lang.Assert;
import cn.hutool.core.util.ObjUtil; import cn.hutool.core.util.ObjUtil;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.common.util.object.BeanUtils; import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeCreateMyReqVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeCreateMyReqVO;
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdateMyReqVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdateMyReqVO;
@ -25,17 +26,19 @@ import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.KNOWLEDGE_NOT_
@Slf4j @Slf4j
public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService { public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
@Resource
private AiKnowledgeBaseMapper knowledgeBaseMapper;
@Resource @Resource
private AiChatModelService chatModalService; private AiChatModelService chatModalService;
@Resource
private AiKnowledgeBaseMapper knowledgeBaseMapper;
@Override @Override
public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) { public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
AiChatModelDO model = validateChatModel(createReqVO.getModelId()); AiChatModelDO model = validateChatModel(createReqVO.getModelId());
AiKnowledgeBaseDO knowledgeBaseDO = BeanUtils.toBean(createReqVO, AiKnowledgeBaseDO.class); AiKnowledgeBaseDO knowledgeBaseDO = BeanUtils.toBean(createReqVO, AiKnowledgeBaseDO.class);
knowledgeBaseDO.setModel(model.getModel()).setUserId(userId); knowledgeBaseDO.setModel(model.getModel()).setUserId(userId).setStatus(CommonStatusEnum.ENABLE.getStatus());
knowledgeBaseMapper.insert(knowledgeBaseDO); knowledgeBaseMapper.insert(knowledgeBaseDO);
return knowledgeBaseDO.getId(); return knowledgeBaseDO.getId();

View File

@ -1,5 +1,7 @@
package cn.iocoder.yudao.module.ai.service.knowledge; package cn.iocoder.yudao.module.ai.service.knowledge;
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeDocumentCreateReqVO;
/** /**
* AI 知识库-文档 Service 接口 * AI 知识库-文档 Service 接口
* *
@ -7,4 +9,13 @@ package cn.iocoder.yudao.module.ai.service.knowledge;
*/ */
public interface AiKnowledgeDocumentService { public interface AiKnowledgeDocumentService {
/**
* 创建文档
*
* @param createReqVO 文档创建 Request VO
* @return 文档编号
*/
Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO);
} }

View File

@ -1,7 +1,25 @@
package cn.iocoder.yudao.module.ai.service.knowledge; package cn.iocoder.yudao.module.ai.service.knowledge;
import cn.hutool.core.collection.CollUtil;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils;
import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeDocumentCreateReqVO;
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeDocumentMapper;
import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeSegmentMapper;
import cn.iocoder.yudao.module.ai.enums.knowledge.AiKnowledgeDocumentStatusEnum;
import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.tika.TikaDocumentReader;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.List;
/** /**
* AI 知识库-文档 Service 实现类 * AI 知识库-文档 Service 实现类
@ -12,5 +30,55 @@ import org.springframework.stereotype.Service;
@Slf4j @Slf4j
public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentService { public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentService {
@Resource
private AiKnowledgeDocumentMapper documentMapper;
@Resource
private AiKnowledgeSegmentMapper segmentMapper;
@Resource
private TokenTextSplitter tokenTextSplitter;
@Resource
private AiEmbeddingService embeddingService;
// TODO @xin 临时测试用后续删
@Value("classpath:/webapp/test/Fel.pdf")
private org.springframework.core.io.Resource data;
@Override
@Transactional(rollbackFor = Exception.class)
public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) {
AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class);
documentDO
//todo
.setTokens(0).setWordCount(0)
.setStatus(CommonStatusEnum.ENABLE.getStatus()).setSliceStatus(AiKnowledgeDocumentStatusEnum.SUCCESS.getStatus());
documentMapper.insert(documentDO);
TikaDocumentReader loader = new TikaDocumentReader(data);
List<Document> documents = loader.get();
Long documentId = documentDO.getId();
if (CollUtil.isEmpty(documents)) {
log.info("文档内容为空");
return documentId;
}
// 文档分段
List<Document> segments = tokenTextSplitter.apply(documents);
List<AiKnowledgeSegmentDO> segmentDOList = CollectionUtils.convertList(segments,
segment -> new AiKnowledgeSegmentDO().setContent(segment.getContent()).setDocumentId(documentId)
//todo
.setTokens(0).setWordCount(0)
.setStatus(CommonStatusEnum.ENABLE.getStatus()));
// 分段内容入库
segmentMapper.insertBatch(segmentDOList);
//向量化并存储
embeddingService.add(segments);
return documentId;
}
} }