mirror of
https://gitee.com/huangge1199_admin/vue-pro.git
synced 2024-11-26 09:11:52 +08:00
【代码评审】AI 大模型:知识库的逻辑
This commit is contained in:
parent
0db165f6d9
commit
6cceab5ba4
@ -7,9 +7,6 @@ import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author xiaoxin
|
||||
*/
|
||||
@Schema(description = "管理后台 - AI 知识库创建【我的】 Request VO")
|
||||
@Data
|
||||
public class AiKnowledgeCreateMyReqVO {
|
||||
@ -21,10 +18,10 @@ public class AiKnowledgeCreateMyReqVO {
|
||||
@Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "存储 ruoyi-vue-pro 操作文档")
|
||||
private String description;
|
||||
|
||||
@Schema(description = "可见权限,只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]")
|
||||
@Schema(description = "可见权限,只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]")
|
||||
private List<Long> visibilityPermissions;
|
||||
|
||||
@Schema(description = "嵌入模型 id", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
|
||||
@Schema(description = "嵌入模型编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
|
||||
@NotNull(message = "嵌入模型不能为空")
|
||||
private Long modelId;
|
||||
|
||||
|
@ -4,6 +4,7 @@ import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import jakarta.validation.constraints.NotBlank;
|
||||
import jakarta.validation.constraints.NotNull;
|
||||
import lombok.Data;
|
||||
import org.hibernate.validator.constraints.URL;
|
||||
|
||||
/**
|
||||
* @author xiaoxin
|
||||
@ -12,7 +13,6 @@ import lombok.Data;
|
||||
@Data
|
||||
public class AiKnowledgeDocumentCreateReqVO {
|
||||
|
||||
|
||||
@Schema(description = "知识库编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204")
|
||||
@NotNull(message = "知识库编号不能为空")
|
||||
private Long knowledgeId;
|
||||
@ -21,7 +21,8 @@ public class AiKnowledgeDocumentCreateReqVO {
|
||||
@NotBlank(message = "文档名称不能为空")
|
||||
private String name;
|
||||
|
||||
@Schema(description = "文档 url", requiredMode = Schema.RequiredMode.REQUIRED, example = "https://doc.iocoder.cn")
|
||||
@Schema(description = "文档 URL", requiredMode = Schema.RequiredMode.REQUIRED, example = "https://doc.iocoder.cn")
|
||||
@URL(message = "文档 URL 格式不正确")
|
||||
private String url;
|
||||
|
||||
}
|
||||
|
@ -7,14 +7,10 @@ import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author xiaoxin
|
||||
*/
|
||||
@Schema(description = "管理后台 - AI 知识库创建【我的】 Request VO")
|
||||
@Data
|
||||
public class AiKnowledgeUpdateMyReqVO {
|
||||
|
||||
|
||||
@Schema(description = "对话编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204")
|
||||
@NotNull(message = "知识库编号不能为空")
|
||||
private Long id;
|
||||
@ -26,10 +22,10 @@ public class AiKnowledgeUpdateMyReqVO {
|
||||
@Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "")
|
||||
private String description;
|
||||
|
||||
@Schema(description = "可见权限,只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]")
|
||||
@Schema(description = "可见权限,只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]")
|
||||
private List<Long> visibilityPermissions;
|
||||
|
||||
@Schema(description = "嵌入模型 id", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
|
||||
@Schema(description = "嵌入模型编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
|
||||
@NotNull(message = "嵌入模型不能为空")
|
||||
private Long modelId;
|
||||
|
||||
|
@ -32,7 +32,7 @@ public class AiImageDO extends BaseDO {
|
||||
/**
|
||||
* 编号
|
||||
*/
|
||||
@TableId(type = IdType.AUTO)
|
||||
@TableId
|
||||
private Long id;
|
||||
|
||||
/**
|
||||
|
@ -1,9 +1,7 @@
|
||||
package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
|
||||
|
||||
|
||||
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
|
||||
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
|
||||
import com.baomidou.mybatisplus.annotation.IdType;
|
||||
import com.baomidou.mybatisplus.annotation.TableField;
|
||||
import com.baomidou.mybatisplus.annotation.TableId;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
@ -12,19 +10,20 @@ import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
// TODO @xin:要不把 AiKnowledgeBaseDO 改成 AiKnowledgeDO。感觉 base 后缀,感觉有点奇怪(让人以为是基类)。然后,我们很多地方的外键编号,都是 knowledgeId
|
||||
/**
|
||||
* AI 知识库 DO
|
||||
*
|
||||
* @author xiaoxin
|
||||
*/
|
||||
@TableName(value = "ai_knowledge_base")
|
||||
@TableName(value = "ai_knowledge_base", autoResultMap = true)
|
||||
@Data
|
||||
public class AiKnowledgeBaseDO extends BaseDO {
|
||||
|
||||
/**
|
||||
* 编号
|
||||
*/
|
||||
@TableId(type = IdType.AUTO)
|
||||
@TableId
|
||||
private Long id;
|
||||
/**
|
||||
* 用户编号
|
||||
@ -40,6 +39,7 @@ public class AiKnowledgeBaseDO extends BaseDO {
|
||||
* 知识库描述
|
||||
*/
|
||||
private String description;
|
||||
// TODO @新:如果全部可见,需要怎么设置?
|
||||
/**
|
||||
* 可见权限,只能选择哪些人可见
|
||||
*/
|
||||
|
@ -3,7 +3,6 @@ package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
|
||||
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
|
||||
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
|
||||
import cn.iocoder.yudao.module.ai.enums.knowledge.AiKnowledgeDocumentStatusEnum;
|
||||
import com.baomidou.mybatisplus.annotation.IdType;
|
||||
import com.baomidou.mybatisplus.annotation.TableId;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import lombok.Data;
|
||||
@ -20,10 +19,12 @@ public class AiKnowledgeDocumentDO extends BaseDO {
|
||||
/**
|
||||
* 编号
|
||||
*/
|
||||
@TableId(type = IdType.AUTO)
|
||||
@TableId
|
||||
private Long id;
|
||||
/**
|
||||
* 知识库编号
|
||||
*
|
||||
* 关联 {@link AiKnowledgeBaseDO#getId()}
|
||||
*/
|
||||
private Long knowledgeId;
|
||||
/**
|
||||
@ -39,7 +40,7 @@ public class AiKnowledgeDocumentDO extends BaseDO {
|
||||
*/
|
||||
private String url;
|
||||
/**
|
||||
* token数量
|
||||
* token 数量
|
||||
*/
|
||||
private Integer tokens;
|
||||
/**
|
||||
@ -59,4 +60,5 @@ public class AiKnowledgeDocumentDO extends BaseDO {
|
||||
* 枚举 {@link CommonStatusEnum}
|
||||
*/
|
||||
private Integer status;
|
||||
|
||||
}
|
||||
|
@ -2,7 +2,6 @@ package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
|
||||
|
||||
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
|
||||
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
|
||||
import com.baomidou.mybatisplus.annotation.IdType;
|
||||
import com.baomidou.mybatisplus.annotation.TableId;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import lombok.Data;
|
||||
@ -19,14 +18,17 @@ public class AiKnowledgeSegmentDO extends BaseDO {
|
||||
/**
|
||||
* 编号
|
||||
*/
|
||||
@TableId(type = IdType.AUTO)
|
||||
@TableId
|
||||
private Long id;
|
||||
/**
|
||||
* 向量库的id
|
||||
* 向量库的编号
|
||||
*/
|
||||
private String vectorId;
|
||||
// TODO @新:knowledgeId 加个,会方便点
|
||||
/**
|
||||
* 文档编号
|
||||
*
|
||||
* 关联 {@link AiKnowledgeDocumentDO#getId()}
|
||||
*/
|
||||
private Long documentId;
|
||||
/**
|
||||
@ -38,7 +40,7 @@ public class AiKnowledgeSegmentDO extends BaseDO {
|
||||
*/
|
||||
private Integer wordCount;
|
||||
/**
|
||||
* token数量
|
||||
* token 数量
|
||||
*/
|
||||
private Integer tokens;
|
||||
/**
|
||||
|
@ -19,7 +19,7 @@ public class AiMindMapDO extends BaseDO {
|
||||
/**
|
||||
* 编号
|
||||
*/
|
||||
@TableId(type = IdType.AUTO)
|
||||
@TableId
|
||||
private Long id;
|
||||
|
||||
/**
|
||||
|
@ -25,7 +25,7 @@ public class AiMusicDO extends BaseDO {
|
||||
/**
|
||||
* 编号
|
||||
*/
|
||||
@TableId(type = IdType.AUTO)
|
||||
@TableId
|
||||
private Long id;
|
||||
|
||||
/**
|
||||
|
@ -20,7 +20,7 @@ public class AiWriteDO extends BaseDO {
|
||||
/**
|
||||
* 编号
|
||||
*/
|
||||
@TableId(type = IdType.AUTO)
|
||||
@TableId
|
||||
private Long id;
|
||||
|
||||
/**
|
||||
|
@ -17,11 +17,11 @@ public interface AiEmbeddingService {
|
||||
*/
|
||||
void add(List<Document> documents);
|
||||
|
||||
|
||||
/**
|
||||
* 相似查询
|
||||
*
|
||||
* @param request 查询实体
|
||||
*/
|
||||
List<Document> similaritySearch(SearchRequest request);
|
||||
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
// TODO @xin:是不是不用 AiEmbeddingServiceImpl,直接 vectorStore 注入到需要的地方就好啦。通过 KnowledgeDocumentService 返回就好。
|
||||
/**
|
||||
* AI 嵌入 Service 实现类
|
||||
*
|
||||
@ -30,4 +31,5 @@ public class AiEmbeddingServiceImpl implements AiEmbeddingService {
|
||||
public List<Document> similaritySearch(SearchRequest request) {
|
||||
return vectorStore.similaritySearch(request);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,7 +9,6 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdat
|
||||
*/
|
||||
public interface AiKnowledgeBaseService {
|
||||
|
||||
|
||||
/**
|
||||
* 创建【我的】知识库
|
||||
*
|
||||
@ -27,4 +26,5 @@ public interface AiKnowledgeBaseService {
|
||||
* @param userId 用户编号
|
||||
*/
|
||||
void updateKnowledgeMy(AiKnowledgeUpdateMyReqVO updateReqVO, Long userId);
|
||||
|
||||
}
|
||||
|
@ -32,33 +32,36 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
||||
@Resource
|
||||
private AiKnowledgeBaseMapper knowledgeBaseMapper;
|
||||
|
||||
|
||||
@Override
|
||||
public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
|
||||
// TODO @xin:貌似直接调用 chatModalService.validateChatModel(id) 完事,不用搞个方法
|
||||
// 1. 校验模型配置
|
||||
AiChatModelDO model = validateChatModel(createReqVO.getModelId());
|
||||
|
||||
AiKnowledgeBaseDO knowledgeBaseDO = BeanUtils.toBean(createReqVO, AiKnowledgeBaseDO.class);
|
||||
knowledgeBaseDO.setModel(model.getModel()).setUserId(userId).setStatus(CommonStatusEnum.ENABLE.getStatus());
|
||||
|
||||
// 2. 插入知识库
|
||||
// TODO @xin:不用 DO 结尾
|
||||
AiKnowledgeBaseDO knowledgeBaseDO = BeanUtils.toBean(createReqVO, AiKnowledgeBaseDO.class)
|
||||
.setModel(model.getModel()).setUserId(userId).setStatus(CommonStatusEnum.ENABLE.getStatus());
|
||||
knowledgeBaseMapper.insert(knowledgeBaseDO);
|
||||
return knowledgeBaseDO.getId();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateKnowledgeMy(AiKnowledgeUpdateMyReqVO updateReqVO, Long userId) {
|
||||
|
||||
// 1.1 校验知识库存在
|
||||
AiKnowledgeBaseDO knowledgeBaseDO = validateKnowledgeExists(updateReqVO.getId());
|
||||
if (ObjUtil.notEqual(knowledgeBaseDO.getUserId(), userId)) {
|
||||
throw exception(KNOWLEDGE_NOT_EXISTS);
|
||||
}
|
||||
// 1.2 校验模型配置
|
||||
AiChatModelDO model = validateChatModel(updateReqVO.getModelId());
|
||||
|
||||
// 2. 更新知识库
|
||||
AiKnowledgeBaseDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeBaseDO.class);
|
||||
updateDO.setModel(model.getModel());
|
||||
|
||||
knowledgeBaseMapper.updateById(updateDO);
|
||||
}
|
||||
|
||||
|
||||
private AiChatModelDO validateChatModel(Long id) {
|
||||
AiChatModelDO model = chatModalService.validateChatModel(id);
|
||||
Assert.notNull(model, "未找到对应嵌入模型");
|
||||
@ -72,4 +75,5 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
||||
}
|
||||
return knowledgeBase;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,7 +9,6 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeDocum
|
||||
*/
|
||||
public interface AiKnowledgeDocumentService {
|
||||
|
||||
|
||||
/**
|
||||
* 创建文档
|
||||
*
|
||||
|
@ -43,28 +43,30 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
|
||||
@Resource
|
||||
private AiEmbeddingService embeddingService;
|
||||
|
||||
// TODO @xin:@Resource 注入
|
||||
private static final JTokkitTokenCountEstimator TOKEN_COUNT_ESTIMATOR = new JTokkitTokenCountEstimator();
|
||||
|
||||
// TODO xiaoxin 临时测试用,后续删
|
||||
@Value("classpath:/webapp/test/Fel.pdf")
|
||||
private org.springframework.core.io.Resource data;
|
||||
|
||||
|
||||
// TODO 芋艿:需要 review 下,代码格式;
|
||||
// TODO @xin:最好有 1、/2、/3 这种,让代码更有层次感
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) {
|
||||
|
||||
// TODO xiaoxin 后续从 url 加载
|
||||
TikaDocumentReader loader = new TikaDocumentReader(data);
|
||||
// 加载文档
|
||||
List<Document> documents = loader.get();
|
||||
Document document = CollUtil.getFirst(documents);
|
||||
// TODO 芋艿 文档层面有没有可能会比较大,这两个字段是否可以从分段表计算得出?
|
||||
// TODO @xin:是不是不存在,就抛出异常呀;厚泽 return 呀;
|
||||
// TODO 芋艿 文档层面有没有可能会比较大,这两个字段是否可以从分段表计算得出?回复:先直接算;
|
||||
Integer tokens = Objects.nonNull(document) ? TOKEN_COUNT_ESTIMATOR.estimate(document.getContent()) : 0;
|
||||
Integer wordCount = Objects.nonNull(document) ? document.getContent().length() : 0;
|
||||
|
||||
AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class);
|
||||
documentDO.setTokens(tokens).setWordCount(wordCount)
|
||||
AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class)
|
||||
.setTokens(tokens).setWordCount(wordCount)
|
||||
.setStatus(CommonStatusEnum.ENABLE.getStatus()).setSliceStatus(AiKnowledgeDocumentStatusEnum.SUCCESS.getStatus());
|
||||
// 文档记录入库
|
||||
documentMapper.insert(documentDO);
|
||||
@ -75,17 +77,15 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
|
||||
|
||||
// 文档分段
|
||||
List<Document> segments = tokenTextSplitter.apply(documents);
|
||||
|
||||
// 分段内容入库
|
||||
List<AiKnowledgeSegmentDO> segmentDOList = CollectionUtils.convertList(segments,
|
||||
segment -> new AiKnowledgeSegmentDO().setContent(segment.getContent()).setDocumentId(documentId)
|
||||
.setTokens(TOKEN_COUNT_ESTIMATOR.estimate(segment.getContent())).setWordCount(segment.getContent().length())
|
||||
.setStatus(CommonStatusEnum.ENABLE.getStatus()));
|
||||
// 分段内容入库
|
||||
segmentMapper.insertBatch(segmentDOList);
|
||||
|
||||
//向量化并存储
|
||||
// 向量化并存储
|
||||
embeddingService.add(segments);
|
||||
|
||||
return documentId;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,11 +1,10 @@
|
||||
package cn.iocoder.yudao.module.ai.service.knowledge;
|
||||
|
||||
/**
|
||||
* AI 知识库-分片 Service 接口
|
||||
* AI 知识库分片 Service 接口
|
||||
*
|
||||
* @author xiaoxin
|
||||
*/
|
||||
public interface AiKnowledgeSegmentService {
|
||||
|
||||
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* AI 知识库-基础信息 Service 实现类
|
||||
* AI 知识库分片 Service 实现类
|
||||
*
|
||||
* @author xiaoxin
|
||||
*/
|
||||
@ -12,5 +12,4 @@ import org.springframework.stereotype.Service;
|
||||
@Slf4j
|
||||
public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService {
|
||||
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user