mirror of
https://gitee.com/huangge1199_admin/vue-pro.git
synced 2024-11-23 07:41:53 +08:00
commit
f8d1fa9b49
@ -0,0 +1,16 @@
|
|||||||
|
package cn.iocoder.yudao.module.ai.service.knowledge;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AI 知识库 Service 接口
|
||||||
|
*
|
||||||
|
* @author xiaoxin
|
||||||
|
*/
|
||||||
|
public interface DocService {
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 向量化文档
|
||||||
|
*/
|
||||||
|
void embeddingDoc();
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,44 @@
|
|||||||
|
package cn.iocoder.yudao.module.ai.service.knowledge;
|
||||||
|
|
||||||
|
import jakarta.annotation.Resource;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.ai.document.Document;
|
||||||
|
import org.springframework.ai.reader.tika.TikaDocumentReader;
|
||||||
|
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
||||||
|
import org.springframework.ai.vectorstore.RedisVectorStore;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AI 知识库 Service 实现类
|
||||||
|
*
|
||||||
|
* @author xiaoxin
|
||||||
|
*/
|
||||||
|
@Service
|
||||||
|
@Slf4j
|
||||||
|
public class DocServiceImpl implements DocService {
|
||||||
|
|
||||||
|
@Resource
|
||||||
|
RedisVectorStore vectorStore;
|
||||||
|
@Resource
|
||||||
|
TokenTextSplitter tokenTextSplitter;
|
||||||
|
|
||||||
|
// TODO @xin 临时测试用,后续删
|
||||||
|
@Value("classpath:/webapp/test/Fel.pdf")
|
||||||
|
private org.springframework.core.io.Resource data;
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void embeddingDoc() {
|
||||||
|
// 读取文件
|
||||||
|
org.springframework.core.io.Resource file = data;
|
||||||
|
TikaDocumentReader loader = new TikaDocumentReader(file);
|
||||||
|
List<Document> documents = loader.get();
|
||||||
|
// 文档分段
|
||||||
|
List<Document> segments = tokenTextSplitter.apply(documents);
|
||||||
|
// 向量化并存储
|
||||||
|
vectorStore.add(segments);
|
||||||
|
}
|
||||||
|
}
|
@ -40,6 +40,28 @@
|
|||||||
<version>${spring-ai.version}</version>
|
<version>${spring-ai.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework.ai</groupId>
|
||||||
|
<artifactId>spring-ai-transformers-spring-boot-starter</artifactId>
|
||||||
|
<version>${spring-ai.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework.ai</groupId>
|
||||||
|
<artifactId>spring-ai-tika-document-reader</artifactId>
|
||||||
|
<version>${spring-ai.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework.ai</groupId>
|
||||||
|
<artifactId>spring-ai-redis-store</artifactId>
|
||||||
|
<version>${spring-ai.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework.data</groupId>
|
||||||
|
<artifactId>spring-data-redis</artifactId>
|
||||||
|
<optional>true</optional>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>cn.iocoder.boot</groupId>
|
<groupId>cn.iocoder.boot</groupId>
|
||||||
<artifactId>yudao-common</artifactId>
|
<artifactId>yudao-common</artifactId>
|
||||||
|
@ -10,11 +10,18 @@ import cn.iocoder.yudao.framework.ai.core.model.xinghuo.XingHuoChatModel;
|
|||||||
import cn.iocoder.yudao.framework.ai.core.model.xinghuo.XingHuoChatOptions;
|
import cn.iocoder.yudao.framework.ai.core.model.xinghuo.XingHuoChatOptions;
|
||||||
import com.alibaba.cloud.ai.tongyi.TongYiAutoConfiguration;
|
import com.alibaba.cloud.ai.tongyi.TongYiAutoConfiguration;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.ai.autoconfigure.vectorstore.redis.RedisVectorStoreProperties;
|
||||||
|
import org.springframework.ai.document.MetadataMode;
|
||||||
|
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
||||||
|
import org.springframework.ai.transformers.TransformersEmbeddingModel;
|
||||||
|
import org.springframework.ai.vectorstore.RedisVectorStore;
|
||||||
import org.springframework.boot.autoconfigure.AutoConfiguration;
|
import org.springframework.boot.autoconfigure.AutoConfiguration;
|
||||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||||
|
import org.springframework.boot.autoconfigure.data.redis.RedisProperties;
|
||||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||||
import org.springframework.context.annotation.Bean;
|
import org.springframework.context.annotation.Bean;
|
||||||
import org.springframework.context.annotation.Import;
|
import org.springframework.context.annotation.Import;
|
||||||
|
import redis.clients.jedis.JedisPooled;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 芋道 AI 自动配置
|
* 芋道 AI 自动配置
|
||||||
@ -73,4 +80,33 @@ public class YudaoAiAutoConfiguration {
|
|||||||
return new SunoApi(yudaoAiProperties.getSuno().getBaseUrl());
|
return new SunoApi(yudaoAiProperties.getSuno().getBaseUrl());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ========== rag 相关 ==========
|
||||||
|
@Bean
|
||||||
|
public TransformersEmbeddingModel transformersEmbeddingClient() {
|
||||||
|
return new TransformersEmbeddingModel(MetadataMode.EMBED);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 我们启动有加载很多 Embedding 模型,不晓得取哪个好,先 new 个 TransformersEmbeddingModel 跑
|
||||||
|
*/
|
||||||
|
@Bean
|
||||||
|
public RedisVectorStore vectorStore(TransformersEmbeddingModel transformersEmbeddingModel, RedisVectorStoreProperties properties,
|
||||||
|
RedisProperties redisProperties) {
|
||||||
|
var config = RedisVectorStore.RedisVectorStoreConfig.builder()
|
||||||
|
.withIndexName(properties.getIndex())
|
||||||
|
.withPrefix(properties.getPrefix())
|
||||||
|
.build();
|
||||||
|
|
||||||
|
RedisVectorStore redisVectorStore = new RedisVectorStore(config, transformersEmbeddingModel,
|
||||||
|
new JedisPooled(redisProperties.getHost(), redisProperties.getPort()),
|
||||||
|
properties.isInitializeSchema());
|
||||||
|
redisVectorStore.afterPropertiesSet();
|
||||||
|
return redisVectorStore;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public TokenTextSplitter tokenTextSplitter() {
|
||||||
|
return new TokenTextSplitter(500, 100, 5, 10000, true);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -0,0 +1,59 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2023 - 2024 the original author or authors.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.springframework.ai.autoconfigure.vectorstore.redis;
|
||||||
|
|
||||||
|
import org.springframework.ai.embedding.EmbeddingModel;
|
||||||
|
import org.springframework.ai.vectorstore.RedisVectorStore;
|
||||||
|
import org.springframework.ai.vectorstore.RedisVectorStore.RedisVectorStoreConfig;
|
||||||
|
import org.springframework.boot.autoconfigure.AutoConfiguration;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
|
||||||
|
import org.springframework.boot.autoconfigure.data.redis.RedisAutoConfiguration;
|
||||||
|
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.data.redis.connection.jedis.JedisConnectionFactory;
|
||||||
|
import redis.clients.jedis.JedisPooled;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TODO @xin 先拿 spring-ai 最新代码覆盖,1.0.0-M1 跟 redis 自动配置会冲突
|
||||||
|
*
|
||||||
|
* @author Christian Tzolov
|
||||||
|
* @author Eddú Meléndez
|
||||||
|
*/
|
||||||
|
@AutoConfiguration(after = RedisAutoConfiguration.class)
|
||||||
|
@ConditionalOnClass({JedisPooled.class, JedisConnectionFactory.class, RedisVectorStore.class, EmbeddingModel.class})
|
||||||
|
//@ConditionalOnBean(JedisConnectionFactory.class)
|
||||||
|
@EnableConfigurationProperties(RedisVectorStoreProperties.class)
|
||||||
|
public class RedisVectorStoreAutoConfiguration {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
@ConditionalOnMissingBean
|
||||||
|
public RedisVectorStore vectorStore(EmbeddingModel embeddingModel, RedisVectorStoreProperties properties,
|
||||||
|
JedisConnectionFactory jedisConnectionFactory) {
|
||||||
|
|
||||||
|
var config = RedisVectorStoreConfig.builder()
|
||||||
|
.withIndexName(properties.getIndex())
|
||||||
|
.withPrefix(properties.getPrefix())
|
||||||
|
.build();
|
||||||
|
|
||||||
|
return new RedisVectorStore(config, embeddingModel,
|
||||||
|
new JedisPooled(jedisConnectionFactory.getHostName(), jedisConnectionFactory.getPort()),
|
||||||
|
properties.isInitializeSchema());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,456 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2023 - 2024 the original author or authors.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.springframework.ai.vectorstore;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.ai.document.Document;
|
||||||
|
import org.springframework.ai.embedding.EmbeddingModel;
|
||||||
|
import org.springframework.ai.vectorstore.filter.FilterExpressionConverter;
|
||||||
|
import org.springframework.beans.factory.InitializingBean;
|
||||||
|
import org.springframework.util.Assert;
|
||||||
|
import org.springframework.util.CollectionUtils;
|
||||||
|
import redis.clients.jedis.JedisPooled;
|
||||||
|
import redis.clients.jedis.Pipeline;
|
||||||
|
import redis.clients.jedis.json.Path2;
|
||||||
|
import redis.clients.jedis.search.*;
|
||||||
|
import redis.clients.jedis.search.Schema.FieldType;
|
||||||
|
import redis.clients.jedis.search.schemafields.*;
|
||||||
|
import redis.clients.jedis.search.schemafields.VectorField.VectorAlgorithm;
|
||||||
|
|
||||||
|
import java.text.MessageFormat;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The RedisVectorStore is for managing and querying vector data in a Redis database. It
|
||||||
|
* offers functionalities like adding, deleting, and performing similarity searches on
|
||||||
|
* documents.
|
||||||
|
*
|
||||||
|
* The store utilizes RedisJSON and RedisSearch to handle JSON documents and to index and
|
||||||
|
* search vector data. It supports various vector algorithms (e.g., FLAT, HSNW) for
|
||||||
|
* efficient similarity searches. Additionally, it allows for custom metadata fields in
|
||||||
|
* the documents to be stored alongside the vector and content data.
|
||||||
|
*
|
||||||
|
* This class requires a RedisVectorStoreConfig configuration object for initialization,
|
||||||
|
* which includes settings like Redis URI, index name, field names, and vector algorithms.
|
||||||
|
* It also requires an EmbeddingModel to convert documents into embeddings before storing
|
||||||
|
* them.
|
||||||
|
*
|
||||||
|
* @author Julien Ruaux
|
||||||
|
* @author Christian Tzolov
|
||||||
|
* @author Eddú Meléndez
|
||||||
|
* @see VectorStore
|
||||||
|
* @see RedisVectorStoreConfig
|
||||||
|
* @see EmbeddingModel
|
||||||
|
*/
|
||||||
|
public class RedisVectorStore implements VectorStore, InitializingBean {
|
||||||
|
|
||||||
|
public enum Algorithm {
|
||||||
|
|
||||||
|
FLAT, HSNW
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public record MetadataField(String name, FieldType fieldType) {
|
||||||
|
|
||||||
|
public static MetadataField text(String name) {
|
||||||
|
return new MetadataField(name, FieldType.TEXT);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static MetadataField numeric(String name) {
|
||||||
|
return new MetadataField(name, FieldType.NUMERIC);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static MetadataField tag(String name) {
|
||||||
|
return new MetadataField(name, FieldType.TAG);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration for the Redis vector store.
|
||||||
|
*/
|
||||||
|
public static final class RedisVectorStoreConfig {
|
||||||
|
|
||||||
|
private final String indexName;
|
||||||
|
|
||||||
|
private final String prefix;
|
||||||
|
|
||||||
|
private final String contentFieldName;
|
||||||
|
|
||||||
|
private final String embeddingFieldName;
|
||||||
|
|
||||||
|
private final Algorithm vectorAlgorithm;
|
||||||
|
|
||||||
|
private final List<MetadataField> metadataFields;
|
||||||
|
|
||||||
|
private RedisVectorStoreConfig() {
|
||||||
|
this(builder());
|
||||||
|
}
|
||||||
|
|
||||||
|
private RedisVectorStoreConfig(Builder builder) {
|
||||||
|
this.indexName = builder.indexName;
|
||||||
|
this.prefix = builder.prefix;
|
||||||
|
this.contentFieldName = builder.contentFieldName;
|
||||||
|
this.embeddingFieldName = builder.embeddingFieldName;
|
||||||
|
this.vectorAlgorithm = builder.vectorAlgorithm;
|
||||||
|
this.metadataFields = builder.metadataFields;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start building a new configuration.
|
||||||
|
* @return The entry point for creating a new configuration.
|
||||||
|
*/
|
||||||
|
public static Builder builder() {
|
||||||
|
|
||||||
|
return new Builder();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@return the default config}
|
||||||
|
*/
|
||||||
|
public static RedisVectorStoreConfig defaultConfig() {
|
||||||
|
|
||||||
|
return builder().build();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Builder {
|
||||||
|
|
||||||
|
private String indexName = DEFAULT_INDEX_NAME;
|
||||||
|
|
||||||
|
private String prefix = DEFAULT_PREFIX;
|
||||||
|
|
||||||
|
private String contentFieldName = DEFAULT_CONTENT_FIELD_NAME;
|
||||||
|
|
||||||
|
private String embeddingFieldName = DEFAULT_EMBEDDING_FIELD_NAME;
|
||||||
|
|
||||||
|
private Algorithm vectorAlgorithm = DEFAULT_VECTOR_ALGORITHM;
|
||||||
|
|
||||||
|
private List<MetadataField> metadataFields = new ArrayList<>();
|
||||||
|
|
||||||
|
private Builder() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configures the Redis index name to use.
|
||||||
|
* @param name the index name to use
|
||||||
|
* @return this builder
|
||||||
|
*/
|
||||||
|
public Builder withIndexName(String name) {
|
||||||
|
this.indexName = name;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configures the Redis key prefix to use (default: "embedding:").
|
||||||
|
* @param prefix the prefix to use
|
||||||
|
* @return this builder
|
||||||
|
*/
|
||||||
|
public Builder withPrefix(String prefix) {
|
||||||
|
this.prefix = prefix;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configures the Redis content field name to use.
|
||||||
|
* @param name the content field name to use
|
||||||
|
* @return this builder
|
||||||
|
*/
|
||||||
|
public Builder withContentFieldName(String name) {
|
||||||
|
this.contentFieldName = name;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configures the Redis embedding field name to use.
|
||||||
|
* @param name the embedding field name to use
|
||||||
|
* @return this builder
|
||||||
|
*/
|
||||||
|
public Builder withEmbeddingFieldName(String name) {
|
||||||
|
this.embeddingFieldName = name;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configures the Redis vector algorithmto use.
|
||||||
|
* @param algorithm the vector algorithm to use
|
||||||
|
* @return this builder
|
||||||
|
*/
|
||||||
|
public Builder withVectorAlgorithm(Algorithm algorithm) {
|
||||||
|
this.vectorAlgorithm = algorithm;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder withMetadataFields(MetadataField... fields) {
|
||||||
|
return withMetadataFields(Arrays.asList(fields));
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder withMetadataFields(List<MetadataField> fields) {
|
||||||
|
this.metadataFields = fields;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@return the immutable configuration}
|
||||||
|
*/
|
||||||
|
public RedisVectorStoreConfig build() {
|
||||||
|
|
||||||
|
return new RedisVectorStoreConfig(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private final boolean initializeSchema;
|
||||||
|
|
||||||
|
public static final String DEFAULT_INDEX_NAME = "spring-ai-index";
|
||||||
|
|
||||||
|
public static final String DEFAULT_CONTENT_FIELD_NAME = "content";
|
||||||
|
|
||||||
|
public static final String DEFAULT_EMBEDDING_FIELD_NAME = "embedding";
|
||||||
|
|
||||||
|
public static final String DEFAULT_PREFIX = "embedding:";
|
||||||
|
|
||||||
|
public static final Algorithm DEFAULT_VECTOR_ALGORITHM = Algorithm.HSNW;
|
||||||
|
|
||||||
|
private static final String QUERY_FORMAT = "%s=>[KNN %s @%s $%s AS %s]";
|
||||||
|
|
||||||
|
private static final Path2 JSON_SET_PATH = Path2.of("$");
|
||||||
|
|
||||||
|
private static final String JSON_PATH_PREFIX = "$.";
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(RedisVectorStore.class);
|
||||||
|
|
||||||
|
private static final Predicate<Object> RESPONSE_OK = Predicate.isEqual("OK");
|
||||||
|
|
||||||
|
private static final Predicate<Object> RESPONSE_DEL_OK = Predicate.isEqual(1l);
|
||||||
|
|
||||||
|
private static final String VECTOR_TYPE_FLOAT32 = "FLOAT32";
|
||||||
|
|
||||||
|
private static final String EMBEDDING_PARAM_NAME = "BLOB";
|
||||||
|
|
||||||
|
public static final String DISTANCE_FIELD_NAME = "vector_score";
|
||||||
|
|
||||||
|
private static final String DEFAULT_DISTANCE_METRIC = "COSINE";
|
||||||
|
|
||||||
|
private final JedisPooled jedis;
|
||||||
|
|
||||||
|
private final EmbeddingModel embeddingModel;
|
||||||
|
|
||||||
|
private final RedisVectorStoreConfig config;
|
||||||
|
|
||||||
|
private FilterExpressionConverter filterExpressionConverter;
|
||||||
|
|
||||||
|
public RedisVectorStore(RedisVectorStoreConfig config, EmbeddingModel embeddingModel, JedisPooled jedis,
|
||||||
|
boolean initializeSchema) {
|
||||||
|
|
||||||
|
Assert.notNull(config, "Config must not be null");
|
||||||
|
Assert.notNull(embeddingModel, "Embedding model must not be null");
|
||||||
|
this.initializeSchema = initializeSchema;
|
||||||
|
|
||||||
|
this.jedis = jedis;
|
||||||
|
this.embeddingModel = embeddingModel;
|
||||||
|
this.config = config;
|
||||||
|
this.filterExpressionConverter = new RedisFilterExpressionConverter(this.config.metadataFields);
|
||||||
|
}
|
||||||
|
|
||||||
|
public JedisPooled getJedis() {
|
||||||
|
return this.jedis;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(List<Document> documents) {
|
||||||
|
try (Pipeline pipeline = this.jedis.pipelined()) {
|
||||||
|
for (Document document : documents) {
|
||||||
|
var embedding = this.embeddingModel.embed(document);
|
||||||
|
document.setEmbedding(embedding);
|
||||||
|
|
||||||
|
var fields = new HashMap<String, Object>();
|
||||||
|
fields.put(this.config.embeddingFieldName, embedding);
|
||||||
|
fields.put(this.config.contentFieldName, document.getContent());
|
||||||
|
fields.putAll(document.getMetadata());
|
||||||
|
pipeline.jsonSetWithEscape(key(document.getId()), JSON_SET_PATH, fields);
|
||||||
|
}
|
||||||
|
List<Object> responses = pipeline.syncAndReturnAll();
|
||||||
|
Optional<Object> errResponse = responses.stream().filter(Predicate.not(RESPONSE_OK)).findAny();
|
||||||
|
if (errResponse.isPresent()) {
|
||||||
|
String message = MessageFormat.format("Could not add document: {0}", errResponse.get());
|
||||||
|
if (logger.isErrorEnabled()) {
|
||||||
|
logger.error(message);
|
||||||
|
}
|
||||||
|
throw new RuntimeException(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String key(String id) {
|
||||||
|
return this.config.prefix + id;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<Boolean> delete(List<String> idList) {
|
||||||
|
try (Pipeline pipeline = this.jedis.pipelined()) {
|
||||||
|
for (String id : idList) {
|
||||||
|
pipeline.jsonDel(key(id));
|
||||||
|
}
|
||||||
|
List<Object> responses = pipeline.syncAndReturnAll();
|
||||||
|
Optional<Object> errResponse = responses.stream().filter(Predicate.not(RESPONSE_DEL_OK)).findAny();
|
||||||
|
if (errResponse.isPresent()) {
|
||||||
|
if (logger.isErrorEnabled()) {
|
||||||
|
logger.error("Could not delete document: {}", errResponse.get());
|
||||||
|
}
|
||||||
|
return Optional.of(false);
|
||||||
|
}
|
||||||
|
return Optional.of(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Document> similaritySearch(SearchRequest request) {
|
||||||
|
|
||||||
|
Assert.isTrue(request.getTopK() > 0, "The number of documents to returned must be greater than zero");
|
||||||
|
Assert.isTrue(request.getSimilarityThreshold() >= 0 && request.getSimilarityThreshold() <= 1,
|
||||||
|
"The similarity score is bounded between 0 and 1; least to most similar respectively.");
|
||||||
|
|
||||||
|
String filter = nativeExpressionFilter(request);
|
||||||
|
|
||||||
|
String queryString = String.format(QUERY_FORMAT, filter, request.getTopK(), this.config.embeddingFieldName,
|
||||||
|
EMBEDDING_PARAM_NAME, DISTANCE_FIELD_NAME);
|
||||||
|
|
||||||
|
List<String> returnFields = new ArrayList<>();
|
||||||
|
this.config.metadataFields.stream().map(MetadataField::name).forEach(returnFields::add);
|
||||||
|
returnFields.add(this.config.embeddingFieldName);
|
||||||
|
returnFields.add(this.config.contentFieldName);
|
||||||
|
returnFields.add(DISTANCE_FIELD_NAME);
|
||||||
|
var embedding = toFloatArray(this.embeddingModel.embed(request.getQuery()));
|
||||||
|
Query query = new Query(queryString).addParam(EMBEDDING_PARAM_NAME, RediSearchUtil.toByteArray(embedding))
|
||||||
|
.returnFields(returnFields.toArray(new String[0]))
|
||||||
|
.setSortBy(DISTANCE_FIELD_NAME, true)
|
||||||
|
.dialect(2);
|
||||||
|
|
||||||
|
SearchResult result = this.jedis.ftSearch(this.config.indexName, query);
|
||||||
|
return result.getDocuments()
|
||||||
|
.stream()
|
||||||
|
.filter(d -> similarityScore(d) >= request.getSimilarityThreshold())
|
||||||
|
.map(this::toDocument)
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Document toDocument(redis.clients.jedis.search.Document doc) {
|
||||||
|
var id = doc.getId().substring(this.config.prefix.length());
|
||||||
|
var content = doc.hasProperty(this.config.contentFieldName) ? doc.getString(this.config.contentFieldName)
|
||||||
|
: null;
|
||||||
|
Map<String, Object> metadata = this.config.metadataFields.stream()
|
||||||
|
.map(MetadataField::name)
|
||||||
|
.filter(doc::hasProperty)
|
||||||
|
.collect(Collectors.toMap(Function.identity(), doc::getString));
|
||||||
|
metadata.put(DISTANCE_FIELD_NAME, 1 - similarityScore(doc));
|
||||||
|
return new Document(id, content, metadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
private float similarityScore(redis.clients.jedis.search.Document doc) {
|
||||||
|
return (2 - Float.parseFloat(doc.getString(DISTANCE_FIELD_NAME))) / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String nativeExpressionFilter(SearchRequest request) {
|
||||||
|
if (request.getFilterExpression() == null) {
|
||||||
|
return "*";
|
||||||
|
}
|
||||||
|
return "(" + this.filterExpressionConverter.convertExpression(request.getFilterExpression()) + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void afterPropertiesSet() {
|
||||||
|
|
||||||
|
if (!this.initializeSchema) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If index already exists don't do anything
|
||||||
|
if (this.jedis.ftList().contains(this.config.indexName)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String response = this.jedis.ftCreate(this.config.indexName,
|
||||||
|
FTCreateParams.createParams().on(IndexDataType.JSON).addPrefix(this.config.prefix), schemaFields());
|
||||||
|
if (!RESPONSE_OK.test(response)) {
|
||||||
|
String message = MessageFormat.format("Could not create index: {0}", response);
|
||||||
|
throw new RuntimeException(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Iterable<SchemaField> schemaFields() {
|
||||||
|
Map<String, Object> vectorAttrs = new HashMap<>();
|
||||||
|
vectorAttrs.put("DIM", this.embeddingModel.dimensions());
|
||||||
|
vectorAttrs.put("DISTANCE_METRIC", DEFAULT_DISTANCE_METRIC);
|
||||||
|
vectorAttrs.put("TYPE", VECTOR_TYPE_FLOAT32);
|
||||||
|
List<SchemaField> fields = new ArrayList<>();
|
||||||
|
fields.add(TextField.of(jsonPath(this.config.contentFieldName)).as(this.config.contentFieldName).weight(1.0));
|
||||||
|
fields.add(VectorField.builder()
|
||||||
|
.fieldName(jsonPath(this.config.embeddingFieldName))
|
||||||
|
.algorithm(vectorAlgorithm())
|
||||||
|
.attributes(vectorAttrs)
|
||||||
|
.as(this.config.embeddingFieldName)
|
||||||
|
.build());
|
||||||
|
|
||||||
|
if (!CollectionUtils.isEmpty(this.config.metadataFields)) {
|
||||||
|
for (MetadataField field : this.config.metadataFields) {
|
||||||
|
fields.add(schemaField(field));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
private SchemaField schemaField(MetadataField field) {
|
||||||
|
String fieldName = jsonPath(field.name);
|
||||||
|
switch (field.fieldType) {
|
||||||
|
case NUMERIC:
|
||||||
|
return NumericField.of(fieldName).as(field.name);
|
||||||
|
case TAG:
|
||||||
|
return TagField.of(fieldName).as(field.name);
|
||||||
|
case TEXT:
|
||||||
|
return TextField.of(fieldName).as(field.name);
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
MessageFormat.format("Field {0} has unsupported type {1}", field.name, field.fieldType));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private VectorAlgorithm vectorAlgorithm() {
|
||||||
|
if (config.vectorAlgorithm == Algorithm.HSNW) {
|
||||||
|
return VectorAlgorithm.HNSW;
|
||||||
|
}
|
||||||
|
return VectorAlgorithm.FLAT;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String jsonPath(String field) {
|
||||||
|
return JSON_PATH_PREFIX + field;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static float[] toFloatArray(List<Double> embeddingDouble) {
|
||||||
|
float[] embeddingFloat = new float[embeddingDouble.size()];
|
||||||
|
int i = 0;
|
||||||
|
for (Double d : embeddingDouble) {
|
||||||
|
embeddingFloat[i++] = d.floatValue();
|
||||||
|
}
|
||||||
|
return embeddingFloat;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Binary file not shown.
@ -153,6 +153,10 @@ spring:
|
|||||||
|
|
||||||
spring:
|
spring:
|
||||||
ai:
|
ai:
|
||||||
|
vectorstore:
|
||||||
|
redis:
|
||||||
|
index: default-index
|
||||||
|
prefix: "default:"
|
||||||
qianfan: # 文心一言
|
qianfan: # 文心一言
|
||||||
api-key: x0cuLZ7XsaTCU08vuJWO87Lg
|
api-key: x0cuLZ7XsaTCU08vuJWO87Lg
|
||||||
secret-key: R9mYF9dl9KASgi5RUq0FQt3wRisSnOcK
|
secret-key: R9mYF9dl9KASgi5RUq0FQt3wRisSnOcK
|
||||||
|
Loading…
Reference in New Issue
Block a user