【代码评审】AI 大模型:知识库的逻辑

This commit is contained in:
YunaiV 2024-08-19 19:24:36 +08:00
parent 0db165f6d9
commit 6cceab5ba4
18 changed files with 53 additions and 52 deletions

View File

@ -7,9 +7,6 @@ import lombok.Data;
import java.util.List; import java.util.List;
/**
* @author xiaoxin
*/
@Schema(description = "管理后台 - AI 知识库创建【我的】 Request VO") @Schema(description = "管理后台 - AI 知识库创建【我的】 Request VO")
@Data @Data
public class AiKnowledgeCreateMyReqVO { public class AiKnowledgeCreateMyReqVO {
@ -21,10 +18,10 @@ public class AiKnowledgeCreateMyReqVO {
@Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "存储 ruoyi-vue-pro 操作文档") @Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "存储 ruoyi-vue-pro 操作文档")
private String description; private String description;
@Schema(description = "可见权限,只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]") @Schema(description = "可见权限只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]")
private List<Long> visibilityPermissions; private List<Long> visibilityPermissions;
@Schema(description = "嵌入模型 id", requiredMode = Schema.RequiredMode.REQUIRED, example = "1") @Schema(description = "嵌入模型编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
@NotNull(message = "嵌入模型不能为空") @NotNull(message = "嵌入模型不能为空")
private Long modelId; private Long modelId;

View File

@ -4,6 +4,7 @@ import io.swagger.v3.oas.annotations.media.Schema;
import jakarta.validation.constraints.NotBlank; import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull; import jakarta.validation.constraints.NotNull;
import lombok.Data; import lombok.Data;
import org.hibernate.validator.constraints.URL;
/** /**
* @author xiaoxin * @author xiaoxin
@ -12,7 +13,6 @@ import lombok.Data;
@Data @Data
public class AiKnowledgeDocumentCreateReqVO { public class AiKnowledgeDocumentCreateReqVO {
@Schema(description = "知识库编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204") @Schema(description = "知识库编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204")
@NotNull(message = "知识库编号不能为空") @NotNull(message = "知识库编号不能为空")
private Long knowledgeId; private Long knowledgeId;
@ -21,7 +21,8 @@ public class AiKnowledgeDocumentCreateReqVO {
@NotBlank(message = "文档名称不能为空") @NotBlank(message = "文档名称不能为空")
private String name; private String name;
@Schema(description = "文档 url", requiredMode = Schema.RequiredMode.REQUIRED, example = "https://doc.iocoder.cn") @Schema(description = "文档 URL", requiredMode = Schema.RequiredMode.REQUIRED, example = "https://doc.iocoder.cn")
@URL(message = "文档 URL 格式不正确")
private String url; private String url;
} }

View File

@ -7,14 +7,10 @@ import lombok.Data;
import java.util.List; import java.util.List;
/**
* @author xiaoxin
*/
@Schema(description = "管理后台 - AI 知识库创建【我的】 Request VO") @Schema(description = "管理后台 - AI 知识库创建【我的】 Request VO")
@Data @Data
public class AiKnowledgeUpdateMyReqVO { public class AiKnowledgeUpdateMyReqVO {
@Schema(description = "对话编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204") @Schema(description = "对话编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204")
@NotNull(message = "知识库编号不能为空") @NotNull(message = "知识库编号不能为空")
private Long id; private Long id;
@ -26,10 +22,10 @@ public class AiKnowledgeUpdateMyReqVO {
@Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "") @Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "")
private String description; private String description;
@Schema(description = "可见权限,只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]") @Schema(description = "可见权限只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]")
private List<Long> visibilityPermissions; private List<Long> visibilityPermissions;
@Schema(description = "嵌入模型 id", requiredMode = Schema.RequiredMode.REQUIRED, example = "1") @Schema(description = "嵌入模型编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
@NotNull(message = "嵌入模型不能为空") @NotNull(message = "嵌入模型不能为空")
private Long modelId; private Long modelId;

View File

@ -32,7 +32,7 @@ public class AiImageDO extends BaseDO {
/** /**
* 编号 * 编号
*/ */
@TableId(type = IdType.AUTO) @TableId
private Long id; private Long id;
/** /**

View File

@ -1,9 +1,7 @@
package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge; package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum; import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO; import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableField; import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId; import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName; import com.baomidou.mybatisplus.annotation.TableName;
@ -12,19 +10,20 @@ import lombok.Data;
import java.util.List; import java.util.List;
// TODO @xin要不把 AiKnowledgeBaseDO 改成 AiKnowledgeDO感觉 base 后缀感觉有点奇怪让人以为是基类然后我们很多地方的外键编号都是 knowledgeId
/** /**
* AI 知识库 DO * AI 知识库 DO
* *
* @author xiaoxin * @author xiaoxin
*/ */
@TableName(value = "ai_knowledge_base") @TableName(value = "ai_knowledge_base", autoResultMap = true)
@Data @Data
public class AiKnowledgeBaseDO extends BaseDO { public class AiKnowledgeBaseDO extends BaseDO {
/** /**
* 编号 * 编号
*/ */
@TableId(type = IdType.AUTO) @TableId
private Long id; private Long id;
/** /**
* 用户编号 * 用户编号
@ -40,6 +39,7 @@ public class AiKnowledgeBaseDO extends BaseDO {
* 知识库描述 * 知识库描述
*/ */
private String description; private String description;
// TODO @新如果全部可见需要怎么设置
/** /**
* 可见权限,只能选择哪些人可见 * 可见权限,只能选择哪些人可见
*/ */

View File

@ -3,7 +3,6 @@ package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum; import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO; import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
import cn.iocoder.yudao.module.ai.enums.knowledge.AiKnowledgeDocumentStatusEnum; import cn.iocoder.yudao.module.ai.enums.knowledge.AiKnowledgeDocumentStatusEnum;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId; import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName; import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data; import lombok.Data;
@ -20,10 +19,12 @@ public class AiKnowledgeDocumentDO extends BaseDO {
/** /**
* 编号 * 编号
*/ */
@TableId(type = IdType.AUTO) @TableId
private Long id; private Long id;
/** /**
* 知识库编号 * 知识库编号
*
* 关联 {@link AiKnowledgeBaseDO#getId()}
*/ */
private Long knowledgeId; private Long knowledgeId;
/** /**
@ -39,7 +40,7 @@ public class AiKnowledgeDocumentDO extends BaseDO {
*/ */
private String url; private String url;
/** /**
* token数量 * token 数量
*/ */
private Integer tokens; private Integer tokens;
/** /**
@ -59,4 +60,5 @@ public class AiKnowledgeDocumentDO extends BaseDO {
* 枚举 {@link CommonStatusEnum} * 枚举 {@link CommonStatusEnum}
*/ */
private Integer status; private Integer status;
} }

View File

@ -2,7 +2,6 @@ package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum; import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO; import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId; import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName; import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data; import lombok.Data;
@ -19,14 +18,17 @@ public class AiKnowledgeSegmentDO extends BaseDO {
/** /**
* 编号 * 编号
*/ */
@TableId(type = IdType.AUTO) @TableId
private Long id; private Long id;
/** /**
* 向量库的id * 向量库的编号
*/ */
private String vectorId; private String vectorId;
// TODO @新knowledgeId 加个会方便点
/** /**
* 文档编号 * 文档编号
*
* 关联 {@link AiKnowledgeDocumentDO#getId()}
*/ */
private Long documentId; private Long documentId;
/** /**
@ -38,7 +40,7 @@ public class AiKnowledgeSegmentDO extends BaseDO {
*/ */
private Integer wordCount; private Integer wordCount;
/** /**
* token数量 * token 数量
*/ */
private Integer tokens; private Integer tokens;
/** /**

View File

@ -19,7 +19,7 @@ public class AiMindMapDO extends BaseDO {
/** /**
* 编号 * 编号
*/ */
@TableId(type = IdType.AUTO) @TableId
private Long id; private Long id;
/** /**

View File

@ -25,7 +25,7 @@ public class AiMusicDO extends BaseDO {
/** /**
* 编号 * 编号
*/ */
@TableId(type = IdType.AUTO) @TableId
private Long id; private Long id;
/** /**

View File

@ -20,7 +20,7 @@ public class AiWriteDO extends BaseDO {
/** /**
* 编号 * 编号
*/ */
@TableId(type = IdType.AUTO) @TableId
private Long id; private Long id;
/** /**

View File

@ -17,11 +17,11 @@ public interface AiEmbeddingService {
*/ */
void add(List<Document> documents); void add(List<Document> documents);
/** /**
* 相似查询 * 相似查询
* *
* @param request 查询实体 * @param request 查询实体
*/ */
List<Document> similaritySearch(SearchRequest request); List<Document> similaritySearch(SearchRequest request);
} }

View File

@ -8,6 +8,7 @@ import org.springframework.stereotype.Service;
import java.util.List; import java.util.List;
// TODO @xin是不是不用 AiEmbeddingServiceImpl直接 vectorStore 注入到需要的地方就好啦通过 KnowledgeDocumentService 返回就好
/** /**
* AI 嵌入 Service 实现类 * AI 嵌入 Service 实现类
* *
@ -30,4 +31,5 @@ public class AiEmbeddingServiceImpl implements AiEmbeddingService {
public List<Document> similaritySearch(SearchRequest request) { public List<Document> similaritySearch(SearchRequest request) {
return vectorStore.similaritySearch(request); return vectorStore.similaritySearch(request);
} }
} }

View File

@ -9,7 +9,6 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdat
*/ */
public interface AiKnowledgeBaseService { public interface AiKnowledgeBaseService {
/** /**
* 创建我的知识库 * 创建我的知识库
* *
@ -27,4 +26,5 @@ public interface AiKnowledgeBaseService {
* @param userId 用户编号 * @param userId 用户编号
*/ */
void updateKnowledgeMy(AiKnowledgeUpdateMyReqVO updateReqVO, Long userId); void updateKnowledgeMy(AiKnowledgeUpdateMyReqVO updateReqVO, Long userId);
} }

View File

@ -32,33 +32,36 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
@Resource @Resource
private AiKnowledgeBaseMapper knowledgeBaseMapper; private AiKnowledgeBaseMapper knowledgeBaseMapper;
@Override @Override
public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) { public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
// TODO @xin貌似直接调用 chatModalService.validateChatModel(id) 完事不用搞个方法
// 1. 校验模型配置
AiChatModelDO model = validateChatModel(createReqVO.getModelId()); AiChatModelDO model = validateChatModel(createReqVO.getModelId());
AiKnowledgeBaseDO knowledgeBaseDO = BeanUtils.toBean(createReqVO, AiKnowledgeBaseDO.class); // 2. 插入知识库
knowledgeBaseDO.setModel(model.getModel()).setUserId(userId).setStatus(CommonStatusEnum.ENABLE.getStatus()); // TODO @xin不用 DO 结尾
AiKnowledgeBaseDO knowledgeBaseDO = BeanUtils.toBean(createReqVO, AiKnowledgeBaseDO.class)
.setModel(model.getModel()).setUserId(userId).setStatus(CommonStatusEnum.ENABLE.getStatus());
knowledgeBaseMapper.insert(knowledgeBaseDO); knowledgeBaseMapper.insert(knowledgeBaseDO);
return knowledgeBaseDO.getId(); return knowledgeBaseDO.getId();
} }
@Override @Override
public void updateKnowledgeMy(AiKnowledgeUpdateMyReqVO updateReqVO, Long userId) { public void updateKnowledgeMy(AiKnowledgeUpdateMyReqVO updateReqVO, Long userId) {
// 1.1 校验知识库存在
AiKnowledgeBaseDO knowledgeBaseDO = validateKnowledgeExists(updateReqVO.getId()); AiKnowledgeBaseDO knowledgeBaseDO = validateKnowledgeExists(updateReqVO.getId());
if (ObjUtil.notEqual(knowledgeBaseDO.getUserId(), userId)) { if (ObjUtil.notEqual(knowledgeBaseDO.getUserId(), userId)) {
throw exception(KNOWLEDGE_NOT_EXISTS); throw exception(KNOWLEDGE_NOT_EXISTS);
} }
// 1.2 校验模型配置
AiChatModelDO model = validateChatModel(updateReqVO.getModelId()); AiChatModelDO model = validateChatModel(updateReqVO.getModelId());
// 2. 更新知识库
AiKnowledgeBaseDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeBaseDO.class); AiKnowledgeBaseDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeBaseDO.class);
updateDO.setModel(model.getModel()); updateDO.setModel(model.getModel());
knowledgeBaseMapper.updateById(updateDO); knowledgeBaseMapper.updateById(updateDO);
} }
private AiChatModelDO validateChatModel(Long id) { private AiChatModelDO validateChatModel(Long id) {
AiChatModelDO model = chatModalService.validateChatModel(id); AiChatModelDO model = chatModalService.validateChatModel(id);
Assert.notNull(model, "未找到对应嵌入模型"); Assert.notNull(model, "未找到对应嵌入模型");
@ -72,4 +75,5 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
} }
return knowledgeBase; return knowledgeBase;
} }
} }

View File

@ -9,7 +9,6 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeDocum
*/ */
public interface AiKnowledgeDocumentService { public interface AiKnowledgeDocumentService {
/** /**
* 创建文档 * 创建文档
* *

View File

@ -43,28 +43,30 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
@Resource @Resource
private AiEmbeddingService embeddingService; private AiEmbeddingService embeddingService;
// TODO @xin@Resource 注入
private static final JTokkitTokenCountEstimator TOKEN_COUNT_ESTIMATOR = new JTokkitTokenCountEstimator(); private static final JTokkitTokenCountEstimator TOKEN_COUNT_ESTIMATOR = new JTokkitTokenCountEstimator();
// TODO xiaoxin 临时测试用后续删 // TODO xiaoxin 临时测试用后续删
@Value("classpath:/webapp/test/Fel.pdf") @Value("classpath:/webapp/test/Fel.pdf")
private org.springframework.core.io.Resource data; private org.springframework.core.io.Resource data;
// TODO 芋艿需要 review 代码格式
// TODO @xin最好有 1/2/3 这种让代码更有层次感
@Override @Override
@Transactional(rollbackFor = Exception.class) @Transactional(rollbackFor = Exception.class)
public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) { public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) {
// TODO xiaoxin 后续从 url 加载 // TODO xiaoxin 后续从 url 加载
TikaDocumentReader loader = new TikaDocumentReader(data); TikaDocumentReader loader = new TikaDocumentReader(data);
// 加载文档 // 加载文档
List<Document> documents = loader.get(); List<Document> documents = loader.get();
Document document = CollUtil.getFirst(documents); Document document = CollUtil.getFirst(documents);
// TODO 芋艿 文档层面有没有可能会比较大这两个字段是否可以从分段表计算得出 // TODO @xin是不是不存在就抛出异常呀厚泽 return
// TODO 芋艿 文档层面有没有可能会比较大这两个字段是否可以从分段表计算得出回复先直接算
Integer tokens = Objects.nonNull(document) ? TOKEN_COUNT_ESTIMATOR.estimate(document.getContent()) : 0; Integer tokens = Objects.nonNull(document) ? TOKEN_COUNT_ESTIMATOR.estimate(document.getContent()) : 0;
Integer wordCount = Objects.nonNull(document) ? document.getContent().length() : 0; Integer wordCount = Objects.nonNull(document) ? document.getContent().length() : 0;
AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class); AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class)
documentDO.setTokens(tokens).setWordCount(wordCount) .setTokens(tokens).setWordCount(wordCount)
.setStatus(CommonStatusEnum.ENABLE.getStatus()).setSliceStatus(AiKnowledgeDocumentStatusEnum.SUCCESS.getStatus()); .setStatus(CommonStatusEnum.ENABLE.getStatus()).setSliceStatus(AiKnowledgeDocumentStatusEnum.SUCCESS.getStatus());
// 文档记录入库 // 文档记录入库
documentMapper.insert(documentDO); documentMapper.insert(documentDO);
@ -75,17 +77,15 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
// 文档分段 // 文档分段
List<Document> segments = tokenTextSplitter.apply(documents); List<Document> segments = tokenTextSplitter.apply(documents);
// 分段内容入库
List<AiKnowledgeSegmentDO> segmentDOList = CollectionUtils.convertList(segments, List<AiKnowledgeSegmentDO> segmentDOList = CollectionUtils.convertList(segments,
segment -> new AiKnowledgeSegmentDO().setContent(segment.getContent()).setDocumentId(documentId) segment -> new AiKnowledgeSegmentDO().setContent(segment.getContent()).setDocumentId(documentId)
.setTokens(TOKEN_COUNT_ESTIMATOR.estimate(segment.getContent())).setWordCount(segment.getContent().length()) .setTokens(TOKEN_COUNT_ESTIMATOR.estimate(segment.getContent())).setWordCount(segment.getContent().length())
.setStatus(CommonStatusEnum.ENABLE.getStatus())); .setStatus(CommonStatusEnum.ENABLE.getStatus()));
// 分段内容入库
segmentMapper.insertBatch(segmentDOList); segmentMapper.insertBatch(segmentDOList);
// 向量化并存储
//向量化并存储
embeddingService.add(segments); embeddingService.add(segments);
return documentId; return documentId;
} }
} }

View File

@ -1,11 +1,10 @@
package cn.iocoder.yudao.module.ai.service.knowledge; package cn.iocoder.yudao.module.ai.service.knowledge;
/** /**
* AI 知识库-分片 Service 接口 * AI 知识库分片 Service 接口
* *
* @author xiaoxin * @author xiaoxin
*/ */
public interface AiKnowledgeSegmentService { public interface AiKnowledgeSegmentService {
} }

View File

@ -4,7 +4,7 @@ import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
/** /**
* AI 知识库-基础信息 Service 实现类 * AI 知识库分片 Service 实现类
* *
* @author xiaoxin * @author xiaoxin
*/ */
@ -12,5 +12,4 @@ import org.springframework.stereotype.Service;
@Slf4j @Slf4j
public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService { public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService {
} }