【代码评审】AI 大模型:知识库的逻辑

This commit is contained in:
YunaiV 2024-08-19 19:24:36 +08:00
parent 0db165f6d9
commit 6cceab5ba4
18 changed files with 53 additions and 52 deletions

View File

@ -7,9 +7,6 @@ import lombok.Data;
import java.util.List;
/**
* @author xiaoxin
*/
@Schema(description = "管理后台 - AI 知识库创建【我的】 Request VO")
@Data
public class AiKnowledgeCreateMyReqVO {
@ -21,10 +18,10 @@ public class AiKnowledgeCreateMyReqVO {
@Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "存储 ruoyi-vue-pro 操作文档")
private String description;
@Schema(description = "可见权限,只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]")
@Schema(description = "可见权限只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]")
private List<Long> visibilityPermissions;
@Schema(description = "嵌入模型 id", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
@Schema(description = "嵌入模型编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
@NotNull(message = "嵌入模型不能为空")
private Long modelId;

View File

@ -4,6 +4,7 @@ import io.swagger.v3.oas.annotations.media.Schema;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import lombok.Data;
import org.hibernate.validator.constraints.URL;
/**
* @author xiaoxin
@ -12,7 +13,6 @@ import lombok.Data;
@Data
public class AiKnowledgeDocumentCreateReqVO {
@Schema(description = "知识库编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204")
@NotNull(message = "知识库编号不能为空")
private Long knowledgeId;
@ -21,7 +21,8 @@ public class AiKnowledgeDocumentCreateReqVO {
@NotBlank(message = "文档名称不能为空")
private String name;
@Schema(description = "文档 url", requiredMode = Schema.RequiredMode.REQUIRED, example = "https://doc.iocoder.cn")
@Schema(description = "文档 URL", requiredMode = Schema.RequiredMode.REQUIRED, example = "https://doc.iocoder.cn")
@URL(message = "文档 URL 格式不正确")
private String url;
}

View File

@ -7,14 +7,10 @@ import lombok.Data;
import java.util.List;
/**
* @author xiaoxin
*/
@Schema(description = "管理后台 - AI 知识库创建【我的】 Request VO")
@Data
public class AiKnowledgeUpdateMyReqVO {
@Schema(description = "对话编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204")
@NotNull(message = "知识库编号不能为空")
private Long id;
@ -26,10 +22,10 @@ public class AiKnowledgeUpdateMyReqVO {
@Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "")
private String description;
@Schema(description = "可见权限,只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]")
@Schema(description = "可见权限只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]")
private List<Long> visibilityPermissions;
@Schema(description = "嵌入模型 id", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
@Schema(description = "嵌入模型编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
@NotNull(message = "嵌入模型不能为空")
private Long modelId;

View File

@ -32,7 +32,7 @@ public class AiImageDO extends BaseDO {
/**
* 编号
*/
@TableId(type = IdType.AUTO)
@TableId
private Long id;
/**

View File

@ -1,9 +1,7 @@
package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
@ -12,19 +10,20 @@ import lombok.Data;
import java.util.List;
// TODO @xin要不把 AiKnowledgeBaseDO 改成 AiKnowledgeDO感觉 base 后缀感觉有点奇怪让人以为是基类然后我们很多地方的外键编号都是 knowledgeId
/**
* AI 知识库 DO
*
* @author xiaoxin
*/
@TableName(value = "ai_knowledge_base")
@TableName(value = "ai_knowledge_base", autoResultMap = true)
@Data
public class AiKnowledgeBaseDO extends BaseDO {
/**
* 编号
*/
@TableId(type = IdType.AUTO)
@TableId
private Long id;
/**
* 用户编号
@ -40,6 +39,7 @@ public class AiKnowledgeBaseDO extends BaseDO {
* 知识库描述
*/
private String description;
// TODO @新如果全部可见需要怎么设置
/**
* 可见权限,只能选择哪些人可见
*/

View File

@ -3,7 +3,6 @@ package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
import cn.iocoder.yudao.module.ai.enums.knowledge.AiKnowledgeDocumentStatusEnum;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
@ -20,10 +19,12 @@ public class AiKnowledgeDocumentDO extends BaseDO {
/**
* 编号
*/
@TableId(type = IdType.AUTO)
@TableId
private Long id;
/**
* 知识库编号
*
* 关联 {@link AiKnowledgeBaseDO#getId()}
*/
private Long knowledgeId;
/**
@ -39,7 +40,7 @@ public class AiKnowledgeDocumentDO extends BaseDO {
*/
private String url;
/**
* token数量
* token 数量
*/
private Integer tokens;
/**
@ -59,4 +60,5 @@ public class AiKnowledgeDocumentDO extends BaseDO {
* 枚举 {@link CommonStatusEnum}
*/
private Integer status;
}

View File

@ -2,7 +2,6 @@ package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
@ -19,14 +18,17 @@ public class AiKnowledgeSegmentDO extends BaseDO {
/**
* 编号
*/
@TableId(type = IdType.AUTO)
@TableId
private Long id;
/**
* 向量库的id
* 向量库的编号
*/
private String vectorId;
// TODO @新knowledgeId 加个会方便点
/**
* 文档编号
*
* 关联 {@link AiKnowledgeDocumentDO#getId()}
*/
private Long documentId;
/**
@ -38,7 +40,7 @@ public class AiKnowledgeSegmentDO extends BaseDO {
*/
private Integer wordCount;
/**
* token数量
* token 数量
*/
private Integer tokens;
/**

View File

@ -19,7 +19,7 @@ public class AiMindMapDO extends BaseDO {
/**
* 编号
*/
@TableId(type = IdType.AUTO)
@TableId
private Long id;
/**

View File

@ -25,7 +25,7 @@ public class AiMusicDO extends BaseDO {
/**
* 编号
*/
@TableId(type = IdType.AUTO)
@TableId
private Long id;
/**

View File

@ -20,7 +20,7 @@ public class AiWriteDO extends BaseDO {
/**
* 编号
*/
@TableId(type = IdType.AUTO)
@TableId
private Long id;
/**

View File

@ -17,11 +17,11 @@ public interface AiEmbeddingService {
*/
void add(List<Document> documents);
/**
* 相似查询
*
* @param request 查询实体
*/
List<Document> similaritySearch(SearchRequest request);
}

View File

@ -8,6 +8,7 @@ import org.springframework.stereotype.Service;
import java.util.List;
// TODO @xin是不是不用 AiEmbeddingServiceImpl直接 vectorStore 注入到需要的地方就好啦通过 KnowledgeDocumentService 返回就好
/**
* AI 嵌入 Service 实现类
*
@ -30,4 +31,5 @@ public class AiEmbeddingServiceImpl implements AiEmbeddingService {
public List<Document> similaritySearch(SearchRequest request) {
return vectorStore.similaritySearch(request);
}
}

View File

@ -9,7 +9,6 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdat
*/
public interface AiKnowledgeBaseService {
/**
* 创建我的知识库
*
@ -27,4 +26,5 @@ public interface AiKnowledgeBaseService {
* @param userId 用户编号
*/
void updateKnowledgeMy(AiKnowledgeUpdateMyReqVO updateReqVO, Long userId);
}

View File

@ -32,33 +32,36 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
@Resource
private AiKnowledgeBaseMapper knowledgeBaseMapper;
@Override
public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
// TODO @xin貌似直接调用 chatModalService.validateChatModel(id) 完事不用搞个方法
// 1. 校验模型配置
AiChatModelDO model = validateChatModel(createReqVO.getModelId());
AiKnowledgeBaseDO knowledgeBaseDO = BeanUtils.toBean(createReqVO, AiKnowledgeBaseDO.class);
knowledgeBaseDO.setModel(model.getModel()).setUserId(userId).setStatus(CommonStatusEnum.ENABLE.getStatus());
// 2. 插入知识库
// TODO @xin不用 DO 结尾
AiKnowledgeBaseDO knowledgeBaseDO = BeanUtils.toBean(createReqVO, AiKnowledgeBaseDO.class)
.setModel(model.getModel()).setUserId(userId).setStatus(CommonStatusEnum.ENABLE.getStatus());
knowledgeBaseMapper.insert(knowledgeBaseDO);
return knowledgeBaseDO.getId();
}
@Override
public void updateKnowledgeMy(AiKnowledgeUpdateMyReqVO updateReqVO, Long userId) {
// 1.1 校验知识库存在
AiKnowledgeBaseDO knowledgeBaseDO = validateKnowledgeExists(updateReqVO.getId());
if (ObjUtil.notEqual(knowledgeBaseDO.getUserId(), userId)) {
throw exception(KNOWLEDGE_NOT_EXISTS);
}
// 1.2 校验模型配置
AiChatModelDO model = validateChatModel(updateReqVO.getModelId());
// 2. 更新知识库
AiKnowledgeBaseDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeBaseDO.class);
updateDO.setModel(model.getModel());
knowledgeBaseMapper.updateById(updateDO);
}
private AiChatModelDO validateChatModel(Long id) {
AiChatModelDO model = chatModalService.validateChatModel(id);
Assert.notNull(model, "未找到对应嵌入模型");
@ -72,4 +75,5 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
}
return knowledgeBase;
}
}

View File

@ -9,7 +9,6 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeDocum
*/
public interface AiKnowledgeDocumentService {
/**
* 创建文档
*

View File

@ -43,28 +43,30 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
@Resource
private AiEmbeddingService embeddingService;
// TODO @xin@Resource 注入
private static final JTokkitTokenCountEstimator TOKEN_COUNT_ESTIMATOR = new JTokkitTokenCountEstimator();
// TODO xiaoxin 临时测试用后续删
@Value("classpath:/webapp/test/Fel.pdf")
private org.springframework.core.io.Resource data;
// TODO 芋艿需要 review 代码格式
// TODO @xin最好有 1/2/3 这种让代码更有层次感
@Override
@Transactional(rollbackFor = Exception.class)
public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) {
// TODO xiaoxin 后续从 url 加载
TikaDocumentReader loader = new TikaDocumentReader(data);
// 加载文档
List<Document> documents = loader.get();
Document document = CollUtil.getFirst(documents);
// TODO 芋艿 文档层面有没有可能会比较大这两个字段是否可以从分段表计算得出
// TODO @xin是不是不存在就抛出异常呀厚泽 return
// TODO 芋艿 文档层面有没有可能会比较大这两个字段是否可以从分段表计算得出回复先直接算
Integer tokens = Objects.nonNull(document) ? TOKEN_COUNT_ESTIMATOR.estimate(document.getContent()) : 0;
Integer wordCount = Objects.nonNull(document) ? document.getContent().length() : 0;
AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class);
documentDO.setTokens(tokens).setWordCount(wordCount)
AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class)
.setTokens(tokens).setWordCount(wordCount)
.setStatus(CommonStatusEnum.ENABLE.getStatus()).setSliceStatus(AiKnowledgeDocumentStatusEnum.SUCCESS.getStatus());
// 文档记录入库
documentMapper.insert(documentDO);
@ -75,17 +77,15 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
// 文档分段
List<Document> segments = tokenTextSplitter.apply(documents);
// 分段内容入库
List<AiKnowledgeSegmentDO> segmentDOList = CollectionUtils.convertList(segments,
segment -> new AiKnowledgeSegmentDO().setContent(segment.getContent()).setDocumentId(documentId)
.setTokens(TOKEN_COUNT_ESTIMATOR.estimate(segment.getContent())).setWordCount(segment.getContent().length())
.setStatus(CommonStatusEnum.ENABLE.getStatus()));
// 分段内容入库
segmentMapper.insertBatch(segmentDOList);
//向量化并存储
// 向量化并存储
embeddingService.add(segments);
return documentId;
}
}

View File

@ -1,11 +1,10 @@
package cn.iocoder.yudao.module.ai.service.knowledge;
/**
* AI 知识库-分片 Service 接口
* AI 知识库分片 Service 接口
*
* @author xiaoxin
*/
public interface AiKnowledgeSegmentService {
}

View File

@ -4,7 +4,7 @@ import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
/**
* AI 知识库-基础信息 Service 实现类
* AI 知识库分片 Service 实现类
*
* @author xiaoxin
*/
@ -12,5 +12,4 @@ import org.springframework.stereotype.Service;
@Slf4j
public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService {
}