【解决todo】AI 知识库

This commit is contained in:
xiaoxin 2024-08-28 16:01:10 +08:00
parent 6cceab5ba4
commit ed2296e4c7
10 changed files with 39 additions and 105 deletions

View File

@ -3,7 +3,7 @@ package cn.iocoder.yudao.module.ai.controller.admin.knowledge;
import cn.iocoder.yudao.framework.common.pojo.CommonResult; import cn.iocoder.yudao.framework.common.pojo.CommonResult;
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeCreateMyReqVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeCreateMyReqVO;
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdateMyReqVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdateMyReqVO;
import cn.iocoder.yudao.module.ai.service.knowledge.AiKnowledgeBaseService; import cn.iocoder.yudao.module.ai.service.knowledge.AiKnowledgeService;
import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag; import io.swagger.v3.oas.annotations.tags.Tag;
import jakarta.annotation.Resource; import jakarta.annotation.Resource;
@ -19,7 +19,7 @@ import static cn.iocoder.yudao.framework.security.core.util.SecurityFrameworkUti
public class AiKnowledgeController { public class AiKnowledgeController {
@Resource @Resource
private AiKnowledgeBaseService knowledgeBaseService; private AiKnowledgeService knowledgeBaseService;
@PostMapping("/create-my") @PostMapping("/create-my")
@Operation(summary = "创建【我的】知识库") @Operation(summary = "创建【我的】知识库")

View File

@ -10,15 +10,14 @@ import lombok.Data;
import java.util.List; import java.util.List;
// TODO @xin要不把 AiKnowledgeBaseDO 改成 AiKnowledgeDO感觉 base 后缀感觉有点奇怪让人以为是基类然后我们很多地方的外键编号都是 knowledgeId
/** /**
* AI 知识库 DO * AI 知识库 DO
* *
* @author xiaoxin * @author xiaoxin
*/ */
@TableName(value = "ai_knowledge_base", autoResultMap = true) @TableName(value = "ai_knowledge", autoResultMap = true)
@Data @Data
public class AiKnowledgeBaseDO extends BaseDO { public class AiKnowledgeDO extends BaseDO {
/** /**
* 编号 * 编号

View File

@ -24,7 +24,7 @@ public class AiKnowledgeDocumentDO extends BaseDO {
/** /**
* 知识库编号 * 知识库编号
* *
* 关联 {@link AiKnowledgeBaseDO#getId()} * 关联 {@link AiKnowledgeDO#getId()}
*/ */
private Long knowledgeId; private Long knowledgeId;
/** /**

View File

@ -1,7 +1,7 @@
package cn.iocoder.yudao.module.ai.dal.mysql.knowledge; package cn.iocoder.yudao.module.ai.dal.mysql.knowledge;
import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX; import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX;
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeBaseDO; import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDO;
import org.apache.ibatis.annotations.Mapper; import org.apache.ibatis.annotations.Mapper;
/** /**
@ -10,5 +10,5 @@ import org.apache.ibatis.annotations.Mapper;
* @author xiaoxin * @author xiaoxin
*/ */
@Mapper @Mapper
public interface AiKnowledgeBaseMapper extends BaseMapperX<AiKnowledgeBaseDO> { public interface AiKnowledgeBaseMapper extends BaseMapperX<AiKnowledgeDO> {
} }

View File

@ -1,27 +0,0 @@
package cn.iocoder.yudao.module.ai.service.knowledge;
import org.springframework.ai.document.Document;
import org.springframework.ai.vectorstore.SearchRequest;
import java.util.List;
/**
* AI 嵌入 Service 接口
*
* @author xiaoxin
*/
public interface AiEmbeddingService {
/**
* 向量化文档并存储
*/
void add(List<Document> documents);
/**
* 相似查询
*
* @param request 查询实体
*/
List<Document> similaritySearch(SearchRequest request);
}

View File

@ -1,35 +0,0 @@
package cn.iocoder.yudao.module.ai.service.knowledge;
import jakarta.annotation.Resource;
import org.springframework.ai.document.Document;
import org.springframework.ai.vectorstore.RedisVectorStore;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.stereotype.Service;
import java.util.List;
// TODO @xin是不是不用 AiEmbeddingServiceImpl直接 vectorStore 注入到需要的地方就好啦通过 KnowledgeDocumentService 返回就好
/**
* AI 嵌入 Service 实现类
*
* @author xiaoxin
*/
@Service
public class AiEmbeddingServiceImpl implements AiEmbeddingService {
@Resource
private RedisVectorStore vectorStore;
@Override
// @Async
// TODO xiaoxin 报错先注释
public void add(List<Document> documents) {
vectorStore.add(documents);
}
@Override
public List<Document> similaritySearch(SearchRequest request) {
return vectorStore.similaritySearch(request);
}
}

View File

@ -14,8 +14,9 @@ import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.document.Document; import org.springframework.ai.document.Document;
import org.springframework.ai.reader.tika.TikaDocumentReader; import org.springframework.ai.reader.tika.TikaDocumentReader;
import org.springframework.ai.tokenizer.JTokkitTokenCountEstimator; import org.springframework.ai.tokenizer.TokenCountEstimator;
import org.springframework.ai.transformer.splitter.TokenTextSplitter; import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.RedisVectorStore;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
@ -39,52 +40,49 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
@Resource @Resource
private TokenTextSplitter tokenTextSplitter; private TokenTextSplitter tokenTextSplitter;
@Resource @Resource
private AiEmbeddingService embeddingService; private TokenCountEstimator TOKEN_COUNT_ESTIMATOR;
@Resource
private RedisVectorStore vectorStore;
// TODO @xin@Resource 注入
private static final JTokkitTokenCountEstimator TOKEN_COUNT_ESTIMATOR = new JTokkitTokenCountEstimator();
// TODO xiaoxin 临时测试用后续删 // TODO xiaoxin 临时测试用后续删
@Value("classpath:/webapp/test/Fel.pdf") @Value("classpath:/webapp/test/Fel.pdf")
private org.springframework.core.io.Resource data; private org.springframework.core.io.Resource data;
// TODO 芋艿需要 review 代码格式 // TODO 芋艿需要 review 代码格式
// TODO @xin最好有 1/2/3 这种让代码更有层次感
@Override @Override
@Transactional(rollbackFor = Exception.class) @Transactional(rollbackFor = Exception.class)
public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) { public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) {
// TODO xiaoxin 后续从 url 加载 // TODO xiaoxin 后续从 url 加载
TikaDocumentReader loader = new TikaDocumentReader(data); TikaDocumentReader loader = new TikaDocumentReader(data);
// 加载文档 // 1.1 加载文档
List<Document> documents = loader.get(); List<Document> documents = loader.get();
Document document = CollUtil.getFirst(documents); Document document = CollUtil.getFirst(documents);
// TODO @xin是不是不存在就抛出异常呀厚泽 return // TODO @xin是不是不存在就抛出异常呀厚泽 return
// TODO 芋艿 文档层面有没有可能会比较大这两个字段是否可以从分段表计算得出回复先直接算
Integer tokens = Objects.nonNull(document) ? TOKEN_COUNT_ESTIMATOR.estimate(document.getContent()) : 0; Integer tokens = Objects.nonNull(document) ? TOKEN_COUNT_ESTIMATOR.estimate(document.getContent()) : 0;
Integer wordCount = Objects.nonNull(document) ? document.getContent().length() : 0; Integer wordCount = Objects.nonNull(document) ? document.getContent().length() : 0;
AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class) AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class)
.setTokens(tokens).setWordCount(wordCount) .setTokens(tokens).setWordCount(wordCount)
.setStatus(CommonStatusEnum.ENABLE.getStatus()).setSliceStatus(AiKnowledgeDocumentStatusEnum.SUCCESS.getStatus()); .setStatus(CommonStatusEnum.ENABLE.getStatus()).setSliceStatus(AiKnowledgeDocumentStatusEnum.SUCCESS.getStatus());
// 文档记录入库 // 1.2 文档记录入库
documentMapper.insert(documentDO); documentMapper.insert(documentDO);
Long documentId = documentDO.getId(); Long documentId = documentDO.getId();
if (CollUtil.isEmpty(documents)) { if (CollUtil.isEmpty(documents)) {
return documentId; return documentId;
} }
// 文档分段 // 2.1 文档分段
List<Document> segments = tokenTextSplitter.apply(documents); List<Document> segments = tokenTextSplitter.apply(documents);
// 分段内容入库 // 2.2 分段内容入库
List<AiKnowledgeSegmentDO> segmentDOList = CollectionUtils.convertList(segments, List<AiKnowledgeSegmentDO> segmentDOList = CollectionUtils.convertList(segments,
segment -> new AiKnowledgeSegmentDO().setContent(segment.getContent()).setDocumentId(documentId) segment -> new AiKnowledgeSegmentDO().setContent(segment.getContent()).setDocumentId(documentId)
.setTokens(TOKEN_COUNT_ESTIMATOR.estimate(segment.getContent())).setWordCount(segment.getContent().length()) .setTokens(TOKEN_COUNT_ESTIMATOR.estimate(segment.getContent())).setWordCount(segment.getContent().length())
.setStatus(CommonStatusEnum.ENABLE.getStatus())); .setStatus(CommonStatusEnum.ENABLE.getStatus()));
segmentMapper.insertBatch(segmentDOList); segmentMapper.insertBatch(segmentDOList);
// 向量化并存储 // 3 向量化并存储
embeddingService.add(segments); vectorStore.add(segments);
return documentId; return documentId;
} }

View File

@ -7,7 +7,7 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdat
* *
* @author xiaoxin * @author xiaoxin
*/ */
public interface AiKnowledgeBaseService { public interface AiKnowledgeService {
/** /**
* 创建我的知识库 * 创建我的知识库

View File

@ -1,12 +1,11 @@
package cn.iocoder.yudao.module.ai.service.knowledge; package cn.iocoder.yudao.module.ai.service.knowledge;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.util.ObjUtil; import cn.hutool.core.util.ObjUtil;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum; import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.common.util.object.BeanUtils; import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeCreateMyReqVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeCreateMyReqVO;
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdateMyReqVO; import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdateMyReqVO;
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeBaseDO; import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDO;
import cn.iocoder.yudao.module.ai.dal.dataobject.model.AiChatModelDO; import cn.iocoder.yudao.module.ai.dal.dataobject.model.AiChatModelDO;
import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeBaseMapper; import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeBaseMapper;
import cn.iocoder.yudao.module.ai.service.model.AiChatModelService; import cn.iocoder.yudao.module.ai.service.model.AiChatModelService;
@ -24,7 +23,7 @@ import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.KNOWLEDGE_NOT_
*/ */
@Service @Service
@Slf4j @Slf4j
public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService { public class AiKnowledgeServiceImpl implements AiKnowledgeService {
@Resource @Resource
private AiChatModelService chatModalService; private AiChatModelService chatModalService;
@ -34,42 +33,34 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
@Override @Override
public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) { public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
// TODO @xin貌似直接调用 chatModalService.validateChatModel(id) 完事不用搞个方法
// 1. 校验模型配置 // 1. 校验模型配置
AiChatModelDO model = validateChatModel(createReqVO.getModelId()); AiChatModelDO model = chatModalService.validateChatModel(createReqVO.getModelId());
// 2. 插入知识库 // 2. 插入知识库
// TODO @xin不用 DO 结尾 AiKnowledgeDO knowledgeBase = BeanUtils.toBean(createReqVO, AiKnowledgeDO.class)
AiKnowledgeBaseDO knowledgeBaseDO = BeanUtils.toBean(createReqVO, AiKnowledgeBaseDO.class)
.setModel(model.getModel()).setUserId(userId).setStatus(CommonStatusEnum.ENABLE.getStatus()); .setModel(model.getModel()).setUserId(userId).setStatus(CommonStatusEnum.ENABLE.getStatus());
knowledgeBaseMapper.insert(knowledgeBaseDO); knowledgeBaseMapper.insert(knowledgeBase);
return knowledgeBaseDO.getId(); return knowledgeBase.getId();
} }
@Override @Override
public void updateKnowledgeMy(AiKnowledgeUpdateMyReqVO updateReqVO, Long userId) { public void updateKnowledgeMy(AiKnowledgeUpdateMyReqVO updateReqVO, Long userId) {
// 1.1 校验知识库存在 // 1.1 校验知识库存在
AiKnowledgeBaseDO knowledgeBaseDO = validateKnowledgeExists(updateReqVO.getId()); AiKnowledgeDO knowledgeBaseDO = validateKnowledgeExists(updateReqVO.getId());
if (ObjUtil.notEqual(knowledgeBaseDO.getUserId(), userId)) { if (ObjUtil.notEqual(knowledgeBaseDO.getUserId(), userId)) {
throw exception(KNOWLEDGE_NOT_EXISTS); throw exception(KNOWLEDGE_NOT_EXISTS);
} }
// 1.2 校验模型配置 // 1.2 校验模型配置
AiChatModelDO model = validateChatModel(updateReqVO.getModelId()); AiChatModelDO model = chatModalService.validateChatModel(updateReqVO.getModelId());
// 2. 更新知识库 // 2. 更新知识库
AiKnowledgeBaseDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeBaseDO.class); AiKnowledgeDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeDO.class);
updateDO.setModel(model.getModel()); updateDO.setModel(model.getModel());
knowledgeBaseMapper.updateById(updateDO); knowledgeBaseMapper.updateById(updateDO);
} }
private AiChatModelDO validateChatModel(Long id) { public AiKnowledgeDO validateKnowledgeExists(Long id) {
AiChatModelDO model = chatModalService.validateChatModel(id); AiKnowledgeDO knowledgeBase = knowledgeBaseMapper.selectById(id);
Assert.notNull(model, "未找到对应嵌入模型");
return model;
}
public AiKnowledgeBaseDO validateKnowledgeExists(Long id) {
AiKnowledgeBaseDO knowledgeBase = knowledgeBaseMapper.selectById(id);
if (knowledgeBase == null) { if (knowledgeBase == null) {
throw exception(KNOWLEDGE_NOT_EXISTS); throw exception(KNOWLEDGE_NOT_EXISTS);
} }

View File

@ -13,6 +13,8 @@ import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.autoconfigure.vectorstore.redis.RedisVectorStoreProperties; import org.springframework.ai.autoconfigure.vectorstore.redis.RedisVectorStoreProperties;
import org.springframework.ai.document.MetadataMode; import org.springframework.ai.document.MetadataMode;
import org.springframework.ai.embedding.EmbeddingModel; import org.springframework.ai.embedding.EmbeddingModel;
import org.springframework.ai.tokenizer.JTokkitTokenCountEstimator;
import org.springframework.ai.tokenizer.TokenCountEstimator;
import org.springframework.ai.transformer.splitter.TokenTextSplitter; import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.transformers.TransformersEmbeddingModel; import org.springframework.ai.transformers.TransformersEmbeddingModel;
import org.springframework.ai.vectorstore.RedisVectorStore; import org.springframework.ai.vectorstore.RedisVectorStore;
@ -90,7 +92,7 @@ public class YudaoAiAutoConfiguration {
} }
/** /**
* 我们启动有加载很多 Embedding 模型不晓得取哪个好 new TransformersEmbeddingModel * TODO @xin 抽离出去根据具体模型走
*/ */
@Bean @Bean
@Lazy // TODO 芋艿临时注释避免无法启动 @Lazy // TODO 芋艿临时注释避免无法启动
@ -114,4 +116,10 @@ public class YudaoAiAutoConfiguration {
return new TokenTextSplitter(500, 100, 5, 10000, true); return new TokenTextSplitter(500, 100, 5, 10000, true);
} }
@Bean
@Lazy // TODO 芋艿临时注释避免无法启动
public TokenCountEstimator tokenCountEstimator() {
return new JTokkitTokenCountEstimator();
}
} }