diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeSegmentController.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeSegmentController.java index a0d0952a8..d4ca7ca49 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeSegmentController.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeSegmentController.java @@ -29,7 +29,7 @@ public class AiKnowledgeSegmentController { @GetMapping("/page") @Operation(summary = "获取段落分页") - public CommonResult> getKnowledgeSegmentPageMy(@Valid AiKnowledgeSegmentPageReqVO pageReqVO) { + public CommonResult> getKnowledgeSegmentPage(@Valid AiKnowledgeSegmentPageReqVO pageReqVO) { PageResult pageResult = segmentService.getKnowledgeSegmentPage(pageReqVO); return success(BeanUtils.toBean(pageResult, AiKnowledgeSegmentRespVO.class)); } diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateReqVO.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateReqVO.java index d393fb672..df6b6821d 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateReqVO.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateReqVO.java @@ -23,21 +23,21 @@ public class AiKnowledgeDocumentCreateReqVO { @URL(message = "文档 URL 格式不正确") private String url; - @Schema(description = "每个文本块的目标 token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800") - @NotNull(message = "每个文本块的目标 token 数不能为空") - private Integer defaultChunkSize; + @Schema(description = "每个段落的目标 token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800") + @NotNull(message = "每个段落的目标 token 数不能为空") + private Integer defaultSegmentTokens; - @Schema(description = "每个文本块的最小字符数", requiredMode = Schema.RequiredMode.REQUIRED, example = "350") - @NotNull(message = "每个文本块的最小字符数不能为空") - private Integer minChunkSizeChars; + @Schema(description = "每个段落的最小字符数", requiredMode = Schema.RequiredMode.REQUIRED, example = "350") + @NotNull(message = "每个段落的最小字符数不能为空") + private Integer minSegmentWordCount; - @Schema(description = "丢弃阈值", requiredMode = Schema.RequiredMode.REQUIRED, example = "5") + @Schema(description = "丢弃阈值:低于此阈值的段落会被丢弃", requiredMode = Schema.RequiredMode.REQUIRED, example = "5") @NotNull(message = "丢弃阈值不能为空") private Integer minChunkLengthToEmbed; - @Schema(description = "最大块数", requiredMode = Schema.RequiredMode.REQUIRED, example = "10000") - @NotNull(message = "最大块数不能为空") - private Integer maxNumChunks; + @Schema(description = "最大段落数", requiredMode = Schema.RequiredMode.REQUIRED, example = "10000") + @NotNull(message = "最大段落数不能为空") + private Integer maxNumSegments; @Schema(description = "分块是否保留分隔符", requiredMode = Schema.RequiredMode.REQUIRED, example = "true") @NotNull(message = "分块是否保留分隔符不能为空") diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDO.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDO.java index 5d7556235..1551b8ac8 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDO.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDO.java @@ -38,9 +38,11 @@ public class AiKnowledgeDO extends BaseDO { * 知识库描述 */ private String description; - // TODO @新:如果全部可见,需要怎么设置? + /** - * 可见权限,只能选择哪些人可见 + * 可见权限,选择哪些人可见 + *

+ * -1 所有人可见,其他为各自用户编号 */ @TableField(typeHandler = JacksonTypeHandler.class) private List visibilityPermissions; diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java index 8b82a55db..297944611 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java @@ -40,23 +40,25 @@ public class AiKnowledgeDocumentDO extends BaseDO { */ private String url; /** - * token 数量 + * 文档 token 数量 */ private Integer tokens; /** - * 字符数 + * 文档字符数 */ private Integer wordCount; - // TODO @新:chunk 1)是不是 segment,这样命名保持一致会好点哈?2)Size 是不是改成 Tokens 会统一点;3)defaultChunkSize、defaultChunkSize、minChunkSizeChars、maxNumChunks 这几个字段的命名,可能要微信一起讨论下。尽量命名保持风格统一哈。 + + + // ========== 自定义分段所用参数 ========== + // TODO @新:3)defaultChunkSize、defaultChunkSize、minChunkSizeChars、maxNumChunks 这几个字段的命名,可能要微信一起讨论下。尽量命名保持风格统一哈。 /** * 每个文本块的目标 token 数 */ - private Integer defaultChunkSize; - // TODO @xin:SizeChars 和 wordCount 好像是一个意思,是不是也要统一哈。 + private Integer defaultSegmentTokens; /** * 每个文本块的最小字符数 */ - private Integer minChunkSizeChars; + private Integer minSegmentWordCount; /** * 低于此值的块会被丢弃 */ @@ -64,11 +66,13 @@ public class AiKnowledgeDocumentDO extends BaseDO { /** * 最大块数 */ - private Integer maxNumChunks; + private Integer maxNumSegments; /** * 分块是否保留分隔符 */ private Boolean keepSeparator; + // =================================== + /** * 切片状态 *

diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeSegmentDO.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeSegmentDO.java index 70d85dc60..9bb3d3338 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeSegmentDO.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeSegmentDO.java @@ -2,8 +2,6 @@ package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge; import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum; import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO; -import com.baomidou.mybatisplus.annotation.FieldStrategy; -import com.baomidou.mybatisplus.annotation.TableField; import com.baomidou.mybatisplus.annotation.TableId; import com.baomidou.mybatisplus.annotation.TableName; import lombok.Data; @@ -27,7 +25,6 @@ public class AiKnowledgeSegmentDO extends BaseDO { /** * 向量库的编号 */ - @TableField(updateStrategy = FieldStrategy.ALWAYS) // TODO @新:尽量规避要这个注解。万一后面加个 status 单独更新,可能会踩坑。 private String vectorId; /** * 知识库编号 diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeSegmentMapper.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeSegmentMapper.java index bda05989b..094f19b52 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeSegmentMapper.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeSegmentMapper.java @@ -25,8 +25,7 @@ public interface AiKnowledgeSegmentMapper extends BaseMapperX selectList(List vectorIdList) { + default List selectListByVectorIds(List vectorIdList) { return selectList(new LambdaQueryWrapperX() .in(AiKnowledgeSegmentDO::getVectorId, vectorIdList) .orderByDesc(AiKnowledgeSegmentDO::getId)); diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java index 807509d2b..ff475f92c 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java @@ -71,8 +71,8 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic } // 2 构造文本分段器 - TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(createReqVO.getDefaultChunkSize(), createReqVO.getMinChunkSizeChars(), createReqVO.getMinChunkLengthToEmbed(), - createReqVO.getMaxNumChunks(), createReqVO.getKeepSeparator()); + TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(createReqVO.getDefaultSegmentTokens(), createReqVO.getMinSegmentWordCount(), createReqVO.getMinChunkLengthToEmbed(), + createReqVO.getMaxNumSegments(), createReqVO.getKeepSeparator()); // 2.1 文档分段 List segments = tokenTextSplitter.apply(documents); // 2.2 分段内容入库 diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java index e1386c936..5523fe278 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java @@ -90,7 +90,7 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService } else { // 2.2 禁用删除向量 vectorStore.delete(List.of(oldKnowledgeSegment.getVectorId())); - knowledgeSegment.setVectorId(null); + knowledgeSegment.setVectorId(""); } // 3 更新段落状态 segmentMapper.updateById(knowledgeSegment); @@ -114,7 +114,7 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService return ListUtil.empty(); } // 3.2 段落召回 - return segmentMapper.selectList(CollUtil.getFieldValues(documentList, "id", String.class)); + return segmentMapper.selectListByVectorIds(CollUtil.getFieldValues(documentList, "id", String.class)); } /** diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeService.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeService.java index 8cf07d91a..6ff878d19 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeService.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeService.java @@ -47,13 +47,12 @@ public interface AiKnowledgeService { */ PageResult getKnowledgePageMy(Long userId, PageParam pageReqVO); - // TODO @新:knowledgeId 和 validateKnowledgeExists 的 id 是同一个么?如果是的话,建议变量也用 id 哈,然后两边的 id 注释,保持一致 /** * 根据知识库编号获取向量存储实例 * - * @param knowledgeId 知识库编号 + * @param id 知识库编号 * @return 向量存储实例 */ - VectorStore getVectorStoreById(Long knowledgeId); + VectorStore getVectorStoreById(Long id); } diff --git a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeServiceImpl.java b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeServiceImpl.java index 7b0c37b5f..9269582ac 100644 --- a/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeServiceImpl.java +++ b/yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeServiceImpl.java @@ -29,21 +29,18 @@ import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.KNOWLEDGE_NOT_ @Slf4j public class AiKnowledgeServiceImpl implements AiKnowledgeService { - @Resource - private AiChatModelService chatModalService; - @Resource private AiKnowledgeMapper knowledgeMapper; + @Resource private AiChatModelService chatModelService; @Resource private AiApiKeyService apiKeyService; - // TODO @新:chatModelService 和 apiKeyService 可以放到 33 行的 chatModalService 后面。尽量保持,想通类型的变量在一块。例如说,Service 一块,Mapper 一块。 @Override public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) { // 1. 校验模型配置 - AiChatModelDO model = chatModalService.validateChatModel(createReqVO.getModelId()); + AiChatModelDO model = chatModelService.validateChatModel(createReqVO.getModelId()); // 2. 插入知识库 AiKnowledgeDO knowledgeBase = BeanUtils.toBean(createReqVO, AiKnowledgeDO.class) @@ -60,7 +57,7 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService { throw exception(KNOWLEDGE_NOT_EXISTS); } // 1.2 校验模型配置 - AiChatModelDO model = chatModalService.validateChatModel(updateReqVO.getModelId()); + AiChatModelDO model = chatModelService.validateChatModel(updateReqVO.getModelId()); // 2. 更新知识库 AiKnowledgeDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeDO.class); @@ -83,8 +80,8 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService { } @Override - public VectorStore getVectorStoreById(Long knowledgeId) { - AiKnowledgeDO knowledge = validateKnowledgeExists(knowledgeId); + public VectorStore getVectorStoreById(Long id) { + AiKnowledgeDO knowledge = validateKnowledgeExists(id); AiChatModelDO model = chatModelService.validateChatModel(knowledge.getModelId()); // 创建或获取 VectorStore 对象 return apiKeyService.getOrCreateVectorStore(model.getKeyId()); diff --git a/yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiModelFactoryImpl.java b/yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiModelFactoryImpl.java index a7f7aa2f6..7acd24769 100644 --- a/yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiModelFactoryImpl.java +++ b/yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiModelFactoryImpl.java @@ -197,7 +197,6 @@ public class AiModelFactoryImpl implements AiModelFactory { }); } - // TODO @新:貌似可以创建一个大的 VectorStore。然后搜的时候,通过 Filter.Expression 过滤对应的数据。 @Override public VectorStore getOrCreateVectorStore(EmbeddingModel embeddingModel, AiPlatformEnum platform, String apiKey, String url) { String cacheKey = buildClientCacheKey(VectorStore.class, platform, apiKey, url);