【解决todo】AI 知识库: 字段命名统一 补充注释

This commit is contained in:
xiaoxin 2024-09-22 15:20:55 +08:00
parent 8e56b81a3a
commit 5cd870748d
11 changed files with 38 additions and 41 deletions

View File

@ -29,7 +29,7 @@ public class AiKnowledgeSegmentController {
@GetMapping("/page")
@Operation(summary = "获取段落分页")
public CommonResult<PageResult<AiKnowledgeSegmentRespVO>> getKnowledgeSegmentPageMy(@Valid AiKnowledgeSegmentPageReqVO pageReqVO) {
public CommonResult<PageResult<AiKnowledgeSegmentRespVO>> getKnowledgeSegmentPage(@Valid AiKnowledgeSegmentPageReqVO pageReqVO) {
PageResult<AiKnowledgeSegmentDO> pageResult = segmentService.getKnowledgeSegmentPage(pageReqVO);
return success(BeanUtils.toBean(pageResult, AiKnowledgeSegmentRespVO.class));
}

View File

@ -23,21 +23,21 @@ public class AiKnowledgeDocumentCreateReqVO {
@URL(message = "文档 URL 格式不正确")
private String url;
@Schema(description = "每个文本块的目标 token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800")
@NotNull(message = "每个文本块的目标 token 数不能为空")
private Integer defaultChunkSize;
@Schema(description = "每个段落的目标 token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800")
@NotNull(message = "每个段落的目标 token 数不能为空")
private Integer defaultSegmentTokens;
@Schema(description = "每个文本块的最小字符数", requiredMode = Schema.RequiredMode.REQUIRED, example = "350")
@NotNull(message = "每个文本块的最小字符数不能为空")
private Integer minChunkSizeChars;
@Schema(description = "每个段落的最小字符数", requiredMode = Schema.RequiredMode.REQUIRED, example = "350")
@NotNull(message = "每个段落的最小字符数不能为空")
private Integer minSegmentWordCount;
@Schema(description = "丢弃阈值", requiredMode = Schema.RequiredMode.REQUIRED, example = "5")
@Schema(description = "丢弃阈值:低于此阈值的段落会被丢弃", requiredMode = Schema.RequiredMode.REQUIRED, example = "5")
@NotNull(message = "丢弃阈值不能为空")
private Integer minChunkLengthToEmbed;
@Schema(description = "最大", requiredMode = Schema.RequiredMode.REQUIRED, example = "10000")
@NotNull(message = "最大数不能为空")
private Integer maxNumChunks;
@Schema(description = "最大段落", requiredMode = Schema.RequiredMode.REQUIRED, example = "10000")
@NotNull(message = "最大段落数不能为空")
private Integer maxNumSegments;
@Schema(description = "分块是否保留分隔符", requiredMode = Schema.RequiredMode.REQUIRED, example = "true")
@NotNull(message = "分块是否保留分隔符不能为空")

View File

@ -38,9 +38,11 @@ public class AiKnowledgeDO extends BaseDO {
* 知识库描述
*/
private String description;
// TODO @新如果全部可见需要怎么设置
/**
* 可见权限,只能选择哪些人可见
* 可见权限,选择哪些人可见
* <p>
* -1 所有人可见其他为各自用户编号
*/
@TableField(typeHandler = JacksonTypeHandler.class)
private List<Long> visibilityPermissions;

View File

@ -40,23 +40,25 @@ public class AiKnowledgeDocumentDO extends BaseDO {
*/
private String url;
/**
* token 数量
* 文档 token 数量
*/
private Integer tokens;
/**
* 字符数
* 文档字符数
*/
private Integer wordCount;
// TODO @新chunk 1是不是 segment这样命名保持一致会好点哈2Size 是不是改成 Tokens 会统一点3defaultChunkSizedefaultChunkSizeminChunkSizeCharsmaxNumChunks 这几个字段的命名可能要微信一起讨论下尽量命名保持风格统一哈
// ========== 自定义分段所用参数 ==========
// TODO @新3defaultChunkSizedefaultChunkSizeminChunkSizeCharsmaxNumChunks 这几个字段的命名可能要微信一起讨论下尽量命名保持风格统一哈
/**
* 每个文本块的目标 token
*/
private Integer defaultChunkSize;
// TODO @xinSizeChars wordCount 好像是一个意思是不是也要统一哈
private Integer defaultSegmentTokens;
/**
* 每个文本块的最小字符数
*/
private Integer minChunkSizeChars;
private Integer minSegmentWordCount;
/**
* 低于此值的块会被丢弃
*/
@ -64,11 +66,13 @@ public class AiKnowledgeDocumentDO extends BaseDO {
/**
* 最大块数
*/
private Integer maxNumChunks;
private Integer maxNumSegments;
/**
* 分块是否保留分隔符
*/
private Boolean keepSeparator;
// ===================================
/**
* 切片状态
* <p>

View File

@ -2,8 +2,6 @@ package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
import com.baomidou.mybatisplus.annotation.FieldStrategy;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
@ -27,7 +25,6 @@ public class AiKnowledgeSegmentDO extends BaseDO {
/**
* 向量库的编号
*/
@TableField(updateStrategy = FieldStrategy.ALWAYS) // TODO @新尽量规避要这个注解万一后面加个 status 单独更新可能会踩坑
private String vectorId;
/**
* 知识库编号

View File

@ -25,8 +25,7 @@ public interface AiKnowledgeSegmentMapper extends BaseMapperX<AiKnowledgeSegment
.orderByDesc(AiKnowledgeSegmentDO::getId));
}
// TODO @新selectListByXXX
default List<AiKnowledgeSegmentDO> selectList(List<String> vectorIdList) {
default List<AiKnowledgeSegmentDO> selectListByVectorIds(List<String> vectorIdList) {
return selectList(new LambdaQueryWrapperX<AiKnowledgeSegmentDO>()
.in(AiKnowledgeSegmentDO::getVectorId, vectorIdList)
.orderByDesc(AiKnowledgeSegmentDO::getId));

View File

@ -71,8 +71,8 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
}
// 2 构造文本分段器
TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(createReqVO.getDefaultChunkSize(), createReqVO.getMinChunkSizeChars(), createReqVO.getMinChunkLengthToEmbed(),
createReqVO.getMaxNumChunks(), createReqVO.getKeepSeparator());
TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(createReqVO.getDefaultSegmentTokens(), createReqVO.getMinSegmentWordCount(), createReqVO.getMinChunkLengthToEmbed(),
createReqVO.getMaxNumSegments(), createReqVO.getKeepSeparator());
// 2.1 文档分段
List<Document> segments = tokenTextSplitter.apply(documents);
// 2.2 分段内容入库

View File

@ -90,7 +90,7 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
} else {
// 2.2 禁用删除向量
vectorStore.delete(List.of(oldKnowledgeSegment.getVectorId()));
knowledgeSegment.setVectorId(null);
knowledgeSegment.setVectorId("");
}
// 3 更新段落状态
segmentMapper.updateById(knowledgeSegment);
@ -114,7 +114,7 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
return ListUtil.empty();
}
// 3.2 段落召回
return segmentMapper.selectList(CollUtil.getFieldValues(documentList, "id", String.class));
return segmentMapper.selectListByVectorIds(CollUtil.getFieldValues(documentList, "id", String.class));
}
/**

View File

@ -47,13 +47,12 @@ public interface AiKnowledgeService {
*/
PageResult<AiKnowledgeDO> getKnowledgePageMy(Long userId, PageParam pageReqVO);
// TODO @新knowledgeId validateKnowledgeExists id 是同一个么如果是的话建议变量也用 id 然后两边的 id 注释保持一致
/**
* 根据知识库编号获取向量存储实例
*
* @param knowledgeId 知识库编号
* @param id 知识库编号
* @return 向量存储实例
*/
VectorStore getVectorStoreById(Long knowledgeId);
VectorStore getVectorStoreById(Long id);
}

View File

@ -29,21 +29,18 @@ import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.KNOWLEDGE_NOT_
@Slf4j
public class AiKnowledgeServiceImpl implements AiKnowledgeService {
@Resource
private AiChatModelService chatModalService;
@Resource
private AiKnowledgeMapper knowledgeMapper;
@Resource
private AiChatModelService chatModelService;
@Resource
private AiApiKeyService apiKeyService;
// TODO @新chatModelService apiKeyService 可以放到 33 行的 chatModalService 后面尽量保持想通类型的变量在一块例如说Service 一块Mapper 一块
@Override
public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
// 1. 校验模型配置
AiChatModelDO model = chatModalService.validateChatModel(createReqVO.getModelId());
AiChatModelDO model = chatModelService.validateChatModel(createReqVO.getModelId());
// 2. 插入知识库
AiKnowledgeDO knowledgeBase = BeanUtils.toBean(createReqVO, AiKnowledgeDO.class)
@ -60,7 +57,7 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService {
throw exception(KNOWLEDGE_NOT_EXISTS);
}
// 1.2 校验模型配置
AiChatModelDO model = chatModalService.validateChatModel(updateReqVO.getModelId());
AiChatModelDO model = chatModelService.validateChatModel(updateReqVO.getModelId());
// 2. 更新知识库
AiKnowledgeDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeDO.class);
@ -83,8 +80,8 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService {
}
@Override
public VectorStore getVectorStoreById(Long knowledgeId) {
AiKnowledgeDO knowledge = validateKnowledgeExists(knowledgeId);
public VectorStore getVectorStoreById(Long id) {
AiKnowledgeDO knowledge = validateKnowledgeExists(id);
AiChatModelDO model = chatModelService.validateChatModel(knowledge.getModelId());
// 创建或获取 VectorStore 对象
return apiKeyService.getOrCreateVectorStore(model.getKeyId());

View File

@ -197,7 +197,6 @@ public class AiModelFactoryImpl implements AiModelFactory {
});
}
// TODO @新貌似可以创建一个大的 VectorStore然后搜的时候通过 Filter.Expression 过滤对应的数据
@Override
public VectorStore getOrCreateVectorStore(EmbeddingModel embeddingModel, AiPlatformEnum platform, String apiKey, String url) {
String cacheKey = buildClientCacheKey(VectorStore.class, platform, apiKey, url);