mirror of
https://gitee.com/huangge1199_admin/vue-pro.git
synced 2024-12-02 12:11:54 +08:00
【解决todo】AI 知识库: 字段命名统一 补充注释
This commit is contained in:
parent
8e56b81a3a
commit
5cd870748d
@ -29,7 +29,7 @@ public class AiKnowledgeSegmentController {
|
|||||||
|
|
||||||
@GetMapping("/page")
|
@GetMapping("/page")
|
||||||
@Operation(summary = "获取段落分页")
|
@Operation(summary = "获取段落分页")
|
||||||
public CommonResult<PageResult<AiKnowledgeSegmentRespVO>> getKnowledgeSegmentPageMy(@Valid AiKnowledgeSegmentPageReqVO pageReqVO) {
|
public CommonResult<PageResult<AiKnowledgeSegmentRespVO>> getKnowledgeSegmentPage(@Valid AiKnowledgeSegmentPageReqVO pageReqVO) {
|
||||||
PageResult<AiKnowledgeSegmentDO> pageResult = segmentService.getKnowledgeSegmentPage(pageReqVO);
|
PageResult<AiKnowledgeSegmentDO> pageResult = segmentService.getKnowledgeSegmentPage(pageReqVO);
|
||||||
return success(BeanUtils.toBean(pageResult, AiKnowledgeSegmentRespVO.class));
|
return success(BeanUtils.toBean(pageResult, AiKnowledgeSegmentRespVO.class));
|
||||||
}
|
}
|
||||||
|
@ -23,21 +23,21 @@ public class AiKnowledgeDocumentCreateReqVO {
|
|||||||
@URL(message = "文档 URL 格式不正确")
|
@URL(message = "文档 URL 格式不正确")
|
||||||
private String url;
|
private String url;
|
||||||
|
|
||||||
@Schema(description = "每个文本块的目标 token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800")
|
@Schema(description = "每个段落的目标 token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800")
|
||||||
@NotNull(message = "每个文本块的目标 token 数不能为空")
|
@NotNull(message = "每个段落的目标 token 数不能为空")
|
||||||
private Integer defaultChunkSize;
|
private Integer defaultSegmentTokens;
|
||||||
|
|
||||||
@Schema(description = "每个文本块的最小字符数", requiredMode = Schema.RequiredMode.REQUIRED, example = "350")
|
@Schema(description = "每个段落的最小字符数", requiredMode = Schema.RequiredMode.REQUIRED, example = "350")
|
||||||
@NotNull(message = "每个文本块的最小字符数不能为空")
|
@NotNull(message = "每个段落的最小字符数不能为空")
|
||||||
private Integer minChunkSizeChars;
|
private Integer minSegmentWordCount;
|
||||||
|
|
||||||
@Schema(description = "丢弃阈值", requiredMode = Schema.RequiredMode.REQUIRED, example = "5")
|
@Schema(description = "丢弃阈值:低于此阈值的段落会被丢弃", requiredMode = Schema.RequiredMode.REQUIRED, example = "5")
|
||||||
@NotNull(message = "丢弃阈值不能为空")
|
@NotNull(message = "丢弃阈值不能为空")
|
||||||
private Integer minChunkLengthToEmbed;
|
private Integer minChunkLengthToEmbed;
|
||||||
|
|
||||||
@Schema(description = "最大块数", requiredMode = Schema.RequiredMode.REQUIRED, example = "10000")
|
@Schema(description = "最大段落数", requiredMode = Schema.RequiredMode.REQUIRED, example = "10000")
|
||||||
@NotNull(message = "最大块数不能为空")
|
@NotNull(message = "最大段落数不能为空")
|
||||||
private Integer maxNumChunks;
|
private Integer maxNumSegments;
|
||||||
|
|
||||||
@Schema(description = "分块是否保留分隔符", requiredMode = Schema.RequiredMode.REQUIRED, example = "true")
|
@Schema(description = "分块是否保留分隔符", requiredMode = Schema.RequiredMode.REQUIRED, example = "true")
|
||||||
@NotNull(message = "分块是否保留分隔符不能为空")
|
@NotNull(message = "分块是否保留分隔符不能为空")
|
||||||
|
@ -38,9 +38,11 @@ public class AiKnowledgeDO extends BaseDO {
|
|||||||
* 知识库描述
|
* 知识库描述
|
||||||
*/
|
*/
|
||||||
private String description;
|
private String description;
|
||||||
// TODO @新:如果全部可见,需要怎么设置?
|
|
||||||
/**
|
/**
|
||||||
* 可见权限,只能选择哪些人可见
|
* 可见权限,选择哪些人可见
|
||||||
|
* <p>
|
||||||
|
* -1 所有人可见,其他为各自用户编号
|
||||||
*/
|
*/
|
||||||
@TableField(typeHandler = JacksonTypeHandler.class)
|
@TableField(typeHandler = JacksonTypeHandler.class)
|
||||||
private List<Long> visibilityPermissions;
|
private List<Long> visibilityPermissions;
|
||||||
|
@ -40,23 +40,25 @@ public class AiKnowledgeDocumentDO extends BaseDO {
|
|||||||
*/
|
*/
|
||||||
private String url;
|
private String url;
|
||||||
/**
|
/**
|
||||||
* token 数量
|
* 文档 token 数量
|
||||||
*/
|
*/
|
||||||
private Integer tokens;
|
private Integer tokens;
|
||||||
/**
|
/**
|
||||||
* 字符数
|
* 文档字符数
|
||||||
*/
|
*/
|
||||||
private Integer wordCount;
|
private Integer wordCount;
|
||||||
// TODO @新:chunk 1)是不是 segment,这样命名保持一致会好点哈?2)Size 是不是改成 Tokens 会统一点;3)defaultChunkSize、defaultChunkSize、minChunkSizeChars、maxNumChunks 这几个字段的命名,可能要微信一起讨论下。尽量命名保持风格统一哈。
|
|
||||||
|
|
||||||
|
// ========== 自定义分段所用参数 ==========
|
||||||
|
// TODO @新:3)defaultChunkSize、defaultChunkSize、minChunkSizeChars、maxNumChunks 这几个字段的命名,可能要微信一起讨论下。尽量命名保持风格统一哈。
|
||||||
/**
|
/**
|
||||||
* 每个文本块的目标 token 数
|
* 每个文本块的目标 token 数
|
||||||
*/
|
*/
|
||||||
private Integer defaultChunkSize;
|
private Integer defaultSegmentTokens;
|
||||||
// TODO @xin:SizeChars 和 wordCount 好像是一个意思,是不是也要统一哈。
|
|
||||||
/**
|
/**
|
||||||
* 每个文本块的最小字符数
|
* 每个文本块的最小字符数
|
||||||
*/
|
*/
|
||||||
private Integer minChunkSizeChars;
|
private Integer minSegmentWordCount;
|
||||||
/**
|
/**
|
||||||
* 低于此值的块会被丢弃
|
* 低于此值的块会被丢弃
|
||||||
*/
|
*/
|
||||||
@ -64,11 +66,13 @@ public class AiKnowledgeDocumentDO extends BaseDO {
|
|||||||
/**
|
/**
|
||||||
* 最大块数
|
* 最大块数
|
||||||
*/
|
*/
|
||||||
private Integer maxNumChunks;
|
private Integer maxNumSegments;
|
||||||
/**
|
/**
|
||||||
* 分块是否保留分隔符
|
* 分块是否保留分隔符
|
||||||
*/
|
*/
|
||||||
private Boolean keepSeparator;
|
private Boolean keepSeparator;
|
||||||
|
// ===================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 切片状态
|
* 切片状态
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -2,8 +2,6 @@ package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
|
|||||||
|
|
||||||
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
|
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
|
||||||
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
|
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
|
||||||
import com.baomidou.mybatisplus.annotation.FieldStrategy;
|
|
||||||
import com.baomidou.mybatisplus.annotation.TableField;
|
|
||||||
import com.baomidou.mybatisplus.annotation.TableId;
|
import com.baomidou.mybatisplus.annotation.TableId;
|
||||||
import com.baomidou.mybatisplus.annotation.TableName;
|
import com.baomidou.mybatisplus.annotation.TableName;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
@ -27,7 +25,6 @@ public class AiKnowledgeSegmentDO extends BaseDO {
|
|||||||
/**
|
/**
|
||||||
* 向量库的编号
|
* 向量库的编号
|
||||||
*/
|
*/
|
||||||
@TableField(updateStrategy = FieldStrategy.ALWAYS) // TODO @新:尽量规避要这个注解。万一后面加个 status 单独更新,可能会踩坑。
|
|
||||||
private String vectorId;
|
private String vectorId;
|
||||||
/**
|
/**
|
||||||
* 知识库编号
|
* 知识库编号
|
||||||
|
@ -25,8 +25,7 @@ public interface AiKnowledgeSegmentMapper extends BaseMapperX<AiKnowledgeSegment
|
|||||||
.orderByDesc(AiKnowledgeSegmentDO::getId));
|
.orderByDesc(AiKnowledgeSegmentDO::getId));
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO @新:selectListByXXX 哈
|
default List<AiKnowledgeSegmentDO> selectListByVectorIds(List<String> vectorIdList) {
|
||||||
default List<AiKnowledgeSegmentDO> selectList(List<String> vectorIdList) {
|
|
||||||
return selectList(new LambdaQueryWrapperX<AiKnowledgeSegmentDO>()
|
return selectList(new LambdaQueryWrapperX<AiKnowledgeSegmentDO>()
|
||||||
.in(AiKnowledgeSegmentDO::getVectorId, vectorIdList)
|
.in(AiKnowledgeSegmentDO::getVectorId, vectorIdList)
|
||||||
.orderByDesc(AiKnowledgeSegmentDO::getId));
|
.orderByDesc(AiKnowledgeSegmentDO::getId));
|
||||||
|
@ -71,8 +71,8 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 2 构造文本分段器
|
// 2 构造文本分段器
|
||||||
TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(createReqVO.getDefaultChunkSize(), createReqVO.getMinChunkSizeChars(), createReqVO.getMinChunkLengthToEmbed(),
|
TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(createReqVO.getDefaultSegmentTokens(), createReqVO.getMinSegmentWordCount(), createReqVO.getMinChunkLengthToEmbed(),
|
||||||
createReqVO.getMaxNumChunks(), createReqVO.getKeepSeparator());
|
createReqVO.getMaxNumSegments(), createReqVO.getKeepSeparator());
|
||||||
// 2.1 文档分段
|
// 2.1 文档分段
|
||||||
List<Document> segments = tokenTextSplitter.apply(documents);
|
List<Document> segments = tokenTextSplitter.apply(documents);
|
||||||
// 2.2 分段内容入库
|
// 2.2 分段内容入库
|
||||||
|
@ -90,7 +90,7 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
|
|||||||
} else {
|
} else {
|
||||||
// 2.2 禁用删除向量
|
// 2.2 禁用删除向量
|
||||||
vectorStore.delete(List.of(oldKnowledgeSegment.getVectorId()));
|
vectorStore.delete(List.of(oldKnowledgeSegment.getVectorId()));
|
||||||
knowledgeSegment.setVectorId(null);
|
knowledgeSegment.setVectorId("");
|
||||||
}
|
}
|
||||||
// 3 更新段落状态
|
// 3 更新段落状态
|
||||||
segmentMapper.updateById(knowledgeSegment);
|
segmentMapper.updateById(knowledgeSegment);
|
||||||
@ -114,7 +114,7 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
|
|||||||
return ListUtil.empty();
|
return ListUtil.empty();
|
||||||
}
|
}
|
||||||
// 3.2 段落召回
|
// 3.2 段落召回
|
||||||
return segmentMapper.selectList(CollUtil.getFieldValues(documentList, "id", String.class));
|
return segmentMapper.selectListByVectorIds(CollUtil.getFieldValues(documentList, "id", String.class));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -47,13 +47,12 @@ public interface AiKnowledgeService {
|
|||||||
*/
|
*/
|
||||||
PageResult<AiKnowledgeDO> getKnowledgePageMy(Long userId, PageParam pageReqVO);
|
PageResult<AiKnowledgeDO> getKnowledgePageMy(Long userId, PageParam pageReqVO);
|
||||||
|
|
||||||
// TODO @新:knowledgeId 和 validateKnowledgeExists 的 id 是同一个么?如果是的话,建议变量也用 id 哈,然后两边的 id 注释,保持一致
|
|
||||||
/**
|
/**
|
||||||
* 根据知识库编号获取向量存储实例
|
* 根据知识库编号获取向量存储实例
|
||||||
*
|
*
|
||||||
* @param knowledgeId 知识库编号
|
* @param id 知识库编号
|
||||||
* @return 向量存储实例
|
* @return 向量存储实例
|
||||||
*/
|
*/
|
||||||
VectorStore getVectorStoreById(Long knowledgeId);
|
VectorStore getVectorStoreById(Long id);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -29,21 +29,18 @@ import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.KNOWLEDGE_NOT_
|
|||||||
@Slf4j
|
@Slf4j
|
||||||
public class AiKnowledgeServiceImpl implements AiKnowledgeService {
|
public class AiKnowledgeServiceImpl implements AiKnowledgeService {
|
||||||
|
|
||||||
@Resource
|
|
||||||
private AiChatModelService chatModalService;
|
|
||||||
|
|
||||||
@Resource
|
@Resource
|
||||||
private AiKnowledgeMapper knowledgeMapper;
|
private AiKnowledgeMapper knowledgeMapper;
|
||||||
|
|
||||||
@Resource
|
@Resource
|
||||||
private AiChatModelService chatModelService;
|
private AiChatModelService chatModelService;
|
||||||
@Resource
|
@Resource
|
||||||
private AiApiKeyService apiKeyService;
|
private AiApiKeyService apiKeyService;
|
||||||
// TODO @新:chatModelService 和 apiKeyService 可以放到 33 行的 chatModalService 后面。尽量保持,想通类型的变量在一块。例如说,Service 一块,Mapper 一块。
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
|
public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
|
||||||
// 1. 校验模型配置
|
// 1. 校验模型配置
|
||||||
AiChatModelDO model = chatModalService.validateChatModel(createReqVO.getModelId());
|
AiChatModelDO model = chatModelService.validateChatModel(createReqVO.getModelId());
|
||||||
|
|
||||||
// 2. 插入知识库
|
// 2. 插入知识库
|
||||||
AiKnowledgeDO knowledgeBase = BeanUtils.toBean(createReqVO, AiKnowledgeDO.class)
|
AiKnowledgeDO knowledgeBase = BeanUtils.toBean(createReqVO, AiKnowledgeDO.class)
|
||||||
@ -60,7 +57,7 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService {
|
|||||||
throw exception(KNOWLEDGE_NOT_EXISTS);
|
throw exception(KNOWLEDGE_NOT_EXISTS);
|
||||||
}
|
}
|
||||||
// 1.2 校验模型配置
|
// 1.2 校验模型配置
|
||||||
AiChatModelDO model = chatModalService.validateChatModel(updateReqVO.getModelId());
|
AiChatModelDO model = chatModelService.validateChatModel(updateReqVO.getModelId());
|
||||||
|
|
||||||
// 2. 更新知识库
|
// 2. 更新知识库
|
||||||
AiKnowledgeDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeDO.class);
|
AiKnowledgeDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeDO.class);
|
||||||
@ -83,8 +80,8 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorStore getVectorStoreById(Long knowledgeId) {
|
public VectorStore getVectorStoreById(Long id) {
|
||||||
AiKnowledgeDO knowledge = validateKnowledgeExists(knowledgeId);
|
AiKnowledgeDO knowledge = validateKnowledgeExists(id);
|
||||||
AiChatModelDO model = chatModelService.validateChatModel(knowledge.getModelId());
|
AiChatModelDO model = chatModelService.validateChatModel(knowledge.getModelId());
|
||||||
// 创建或获取 VectorStore 对象
|
// 创建或获取 VectorStore 对象
|
||||||
return apiKeyService.getOrCreateVectorStore(model.getKeyId());
|
return apiKeyService.getOrCreateVectorStore(model.getKeyId());
|
||||||
|
@ -197,7 +197,6 @@ public class AiModelFactoryImpl implements AiModelFactory {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO @新:貌似可以创建一个大的 VectorStore。然后搜的时候,通过 Filter.Expression 过滤对应的数据。
|
|
||||||
@Override
|
@Override
|
||||||
public VectorStore getOrCreateVectorStore(EmbeddingModel embeddingModel, AiPlatformEnum platform, String apiKey, String url) {
|
public VectorStore getOrCreateVectorStore(EmbeddingModel embeddingModel, AiPlatformEnum platform, String apiKey, String url) {
|
||||||
String cacheKey = buildClientCacheKey(VectorStore.class, platform, apiKey, url);
|
String cacheKey = buildClientCacheKey(VectorStore.class, platform, apiKey, url);
|
||||||
|
Loading…
Reference in New Issue
Block a user