from redisvl.query import MultiVectorQuery, Vector
from redisvl.index import SearchIndex
from redisvl.schema import IndexSchema
import numpy as np
from redis import Redis
client = Redis(host="localhost", port=6379, decode_responses=True)
test_index = SearchIndex(
IndexSchema.from_dict(
{
"index": {
"name": "idx:test",
"prefix": "test:",
"storage_type": "json",
},
"fields": [
{"name": "id", "type": "numeric"},
{
"name": "embedding_1",
"type": "vector",
"attrs": {
"dims": 512,
"distance_metric": "cosine",
"algorithm": "hnsw",
"datatype": "float32",
},
},
{
"name": "embedding_2",
"type": "vector",
"attrs": {
"dims": 512,
"distance_metric": "cosine",
"algorithm": "hnsw",
"datatype": "float32",
},
},
],
},
),
client,
validate_on_load=True,
)
test_index.create(overwrite=True)
TEST_DOCS_COUNT = 10001
test_data = [
{
"id": i,
"embedding_1": np.random.rand(512),
"embedding_2": np.random.rand(512),
}
for i, _ in enumerate(range(TEST_DOCS_COUNT))
]
test_index.load(test_data)
embedding_1 = np.random.rand(512)
embedding_2 = np.random.rand(512)
query_vectors = [
Vector(
vector=embedding_1,
field_name="embedding_1",
dtype="float32",
weight=0.7,
),
Vector(
vector=embedding_2,
field_name="embedding_2",
dtype="float32",
weight=0.2,
),
]
query = MultiVectorQuery(
vectors=query_vectors,
num_results=10,
return_fields=["id"],
)
results = test_index.query(query)
print(results)
<=10,000 works fine. I assume this is to do with the same logic that sets the max offset to 10,000 by default.
Is RediSearch limited by some design constraint to this number? Seems low for many applications of vector search, or maybe I'm missing something.
To reproduce:
With a TEST_DOCS_COUNT > 10,000 redis throws:
Error while aggregating: Could not find the value for a parameter name, consider using EXISTS if applicable for distance_1<=10,000 works fine. I assume this is to do with the same logic that sets the max offset to 10,000 by default.
The workaround I'm using for now is the unstable feature case() to assign defaults for missing scores.
Is RediSearch limited by some design constraint to this number? Seems low for many applications of vector search, or maybe I'm missing something.