Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .github/workflows/03-macos-linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ jobs:
sudo apt-get install -y clang libomp-dev
shell: bash

- name: Install AIO
if: runner.os == 'Linux' && runner.arch == 'X64'
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
libaio-dev
shell: bash

- name: Print CPU info
if: runner.os == 'Linux'
run: lscpu
Expand Down Expand Up @@ -89,7 +97,6 @@ jobs:
pytest \
scikit-build-core \
setuptools_scm
shell: bash

- name: Build from source
run: |
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/clang_tidy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ jobs:
sudo apt-get update
sudo apt-get install -y clang-tidy=1:18.0-59~exp2 cmake ninja-build libomp-dev

- name: Install AIO
if: runner.os == 'Linux' && runner.arch == 'X64'
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
libaio-dev
shell: bash

- name: Configure CMake and export compile commands
run: |
cmake -S . -B build -G Ninja \
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,6 @@
[submodule "thirdparty/RaBitQ-Library/RaBitQ-Library-0.1"]
path = thirdparty/RaBitQ-Library/RaBitQ-Library-0.1
url = https://github.com/VectorDB-NTU/RaBitQ-Library.git
[submodule "thirdparty/aio/libaio-0.3"]
path = thirdparty/aio/libaio-0.3
url = https://github.com/yugabyte/libaio.git
11 changes: 11 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,17 @@ else()
endif()
message(STATUS "RABITQ_ARCH_FLAG: ${RABITQ_ARCH_FLAG}")

# DiskAnn support (Linux x86_64 only, requires libaio)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i686|i386" AND NOT ANDROID AND NOT IOS)
set(DISKANN_SUPPORTED ON)
add_definitions(-DDISKANN_SUPPORTED=1)
else()
set(DISKANN_SUPPORTED OFF)
add_definitions(-DDISKANN_SUPPORTED=0)
message(STATUS "DiskAnn support disabled - only supported on Linux x86_64")
endif()
message(STATUS "DISKANN_SUPPORTED: ${DISKANN_SUPPORTED}")

option(USE_OSS_MIRROR "Use OSS mirror for faster third-party downloads" OFF)
if(DEFINED ENV{USE_OSS_MIRROR} AND NOT "$ENV{USE_OSS_MIRROR}" STREQUAL "")
set(USE_OSS_MIRROR "$ENV{USE_OSS_MIRROR}" CACHE BOOL "Use OSS mirror for faster third-party downloads" FORCE)
Expand Down
14 changes: 14 additions & 0 deletions python/tests/detail/fixture_helper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import pytest
import logging
import platform

DISKANN_SUPPORTED = platform.system() == "Linux" and platform.machine() in (
"x86_64",
"AMD64",
"i686",
"i386",
)

from typing import Any, Generator
from zvec.typing import DataType, StatusCode, MetricType, QuantizeType
Expand Down Expand Up @@ -97,6 +105,12 @@ def full_schema_new(request) -> CollectionSchema:
else:
nullable, has_index, vector_index = True, False, HnswIndexParam()

# Skip DiskAnn tests on unsupported platforms
from zvec.model.param import DiskAnnIndexParam

if isinstance(vector_index, DiskAnnIndexParam) and not DISKANN_SUPPORTED:
pytest.skip("DiskAnn only supported on Linux x86_64")

scalar_index_param = None
vector_index_param = None
if has_index:
Expand Down
96 changes: 80 additions & 16 deletions python/tests/detail/test_collection_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
HnswIndexParam,
FlatIndexParam,
IVFIndexParam,
DiskAnnIndexParam,
HnswQueryParam,
IVFQueryParam,
DiskAnnQueryParam,
)

from zvec.model.schema import FieldSchema, VectorSchema
Expand Down Expand Up @@ -179,10 +181,24 @@ def get_ground_truth_map(collection, test_docs, query_vectors_map, metric_type,
for field_name, query_vectors in query_vectors_map.items():
ground_truth_map[field_name] = {}

# Support per-field metric type: metric_type can be a dict mapping
# field_name -> MetricType, or a single MetricType applied to all fields.
if isinstance(metric_type, dict):
field_metric = metric_type.get(field_name, MetricType.IP)
else:
field_metric = metric_type

for i, query_vector in enumerate(query_vectors):
# Get the ground truth for this query
relevant_doc_ids_scores = get_ground_truth_for_vector_query(
collection, query_vector, field_name, test_docs, i, metric_type, k, True
collection,
query_vector,
field_name,
test_docs,
i,
field_metric,
k,
True,
)
ground_truth_map[field_name][i] = relevant_doc_ids_scores

Expand Down Expand Up @@ -292,6 +308,7 @@ class TestRecall:
[
(True, True, HnswIndexParam()),
(False, True, IVFIndexParam()),
(False, True, DiskAnnIndexParam()),
(False, True, FlatIndexParam()), # ——ok
(
True,
Expand Down Expand Up @@ -371,6 +388,19 @@ class TestRecall:
use_soar=False,
),
),
(
True,
True,
DiskAnnIndexParam(
metric_type=MetricType.IP,
max_degree=32,
),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.L2, max_degree=32),
),
],
indirect=True,
)
Expand All @@ -388,10 +418,16 @@ def test_recall_with_single_vector_valid_500(
):
full_schema_params = request.getfixturevalue("full_schema_new")

# Build per-field metric type map so ground truth uses each field's
# actual index metric (fields may fall back to HnswIndexParam/IP).
field_metric_map = {}
for vector_para in full_schema_params.vectors:
if vector_para.name == "vector_fp32_field":
metric_type = vector_para.index_param.metric_type
break
if vector_para.index_param is not None:
field_metric_map[vector_para.name] = vector_para.index_param.metric_type
else:
field_metric_map[vector_para.name] = MetricType.IP

metric_type = field_metric_map.get("vector_fp32_field", MetricType.IP)

multiple_docs = [
generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)
Expand Down Expand Up @@ -438,9 +474,13 @@ def test_recall_with_single_vector_valid_500(
multiple_docs[i].vectors[field_name] for i in range(query_num)
]

# Get ground truth mapping
# Get ground truth mapping (pass per-field metric map)
ground_truth_map = get_ground_truth_map(
full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k
full_collection_new,
multiple_docs,
query_vectors_map,
field_metric_map,
top_k,
)

# Validate ground truth mapping structure
Expand Down Expand Up @@ -479,8 +519,8 @@ def test_recall_with_single_vector_valid_500(

print("(recall_at_k_stats:\n")
print(recall_at_k_stats)
print("metric_type:")
print(metric_type)
print("field_metric_map:")
print(field_metric_map)
# Print Recall@K statistics
print(f"Recall@{top_k} using Ground Truth:")
for field_name, stats in recall_at_k_stats.items():
Expand Down Expand Up @@ -552,7 +592,21 @@ def test_recall_with_single_vector_valid_500(
use_soar=True,
),
),
# (True, True, IVFIndexParam(metric_type=MetricType.COSINE, n_list=150, n_iters=15, use_soar=False, )),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.IP, max_degree=32),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.L2, max_degree=32),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.COSINE, max_degree=32),
),
],
indirect=True,
)
Expand All @@ -571,10 +625,16 @@ def test_recall_with_single_vector_valid_2000(
):
full_schema_params = request.getfixturevalue("full_schema_new")

# Build per-field metric type map so ground truth uses each field's
# actual index metric (fields may fall back to HnswIndexParam/IP).
field_metric_map = {}
for vector_para in full_schema_params.vectors:
if vector_para.name == "vector_fp32_field":
metric_type = vector_para.index_param.metric_type
break
if vector_para.index_param is not None:
field_metric_map[vector_para.name] = vector_para.index_param.metric_type
else:
field_metric_map[vector_para.name] = MetricType.IP

metric_type = field_metric_map.get("vector_fp32_field", MetricType.IP)

multiple_docs = [
generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)
Expand Down Expand Up @@ -621,9 +681,13 @@ def test_recall_with_single_vector_valid_2000(
multiple_docs[i].vectors[field_name] for i in range(query_num)
]

# Get ground truth mapping
# Get ground truth mapping (pass per-field metric map)
ground_truth_map = get_ground_truth_map(
full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k
full_collection_new,
multiple_docs,
query_vectors_map,
field_metric_map,
top_k,
)

# Validate ground truth mapping structure
Expand Down Expand Up @@ -662,8 +726,8 @@ def test_recall_with_single_vector_valid_2000(

print("(recall_at_k_stats:\n")
print(recall_at_k_stats)
print("metric_type:")
print(metric_type)
print("field_metric_map:")
print(field_metric_map)
# Print Recall@K statistics
print(f"Recall@{top_k} using Ground Truth:")
for field_name, stats in recall_at_k_stats.items():
Expand Down
4 changes: 4 additions & 0 deletions python/zvec/model/param/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
AddColumnOption,
AlterColumnOption,
CollectionOption,
DiskAnnIndexParam,
DiskAnnQueryParam,
FlatIndexParam,
HnswIndexParam,
HnswQueryParam,
Expand All @@ -33,6 +35,8 @@
"AddColumnOption",
"AlterColumnOption",
"CollectionOption",
"DiskAnnIndexParam",
"DiskAnnQueryParam",
"FlatIndexParam",
"HnswIndexParam",
"HnswQueryParam",
Expand Down
3 changes: 2 additions & 1 deletion src/ailego/algorithm/kmeans.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class Kmc2CentroidsGenerator {
auto *centroids = owner->mutable_centroids();

std::mt19937 mt((std::random_device())());

std::uniform_real_distribution<float> dist(0.0, 1.0);

ContainerType benches(cache.dimension());
Expand Down Expand Up @@ -1216,4 +1217,4 @@ using NibbleInnerProductKmeans =
LloydCluster<T, TPool, TContext, NibbleVectorArray<T>>;

} // namespace ailego
} // namespace zvec
} // namespace zvec
2 changes: 1 addition & 1 deletion src/ailego/algorithm/lloyd_cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -361,4 +361,4 @@ class LloydCluster {
};

} // namespace ailego
} // namespace zvec
} // namespace zvec
5 changes: 5 additions & 0 deletions src/binding/c/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ set_target_properties(zvec_c_api PROPERTIES
VISIBILITY_INLINES_HIDDEN ON
)

# On Windows, define ZVEC_BUILD_SHARED so that c_api.h uses __declspec(dllexport)
if(MSVC OR WIN32)
target_compile_definitions(zvec_c_api PRIVATE ZVEC_BUILD_SHARED)
endif()

find_package(Threads REQUIRED)

# Static linking of C++ standard library is handled in platform-specific sections
Expand Down
62 changes: 43 additions & 19 deletions src/binding/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,49 @@ set(SRC_LISTS
pybind11_add_module(_zvec ${SRC_LISTS})

if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
target_link_libraries(_zvec PRIVATE
-Wl,--whole-archive
$<TARGET_FILE:core_knn_flat_static>
$<TARGET_FILE:core_knn_flat_sparse_static>
$<TARGET_FILE:core_knn_hnsw_static>
$<TARGET_FILE:core_knn_hnsw_rabitq_static>
$<TARGET_FILE:core_knn_hnsw_sparse_static>
$<TARGET_FILE:core_knn_ivf_static>
$<TARGET_FILE:core_knn_cluster_static>
$<TARGET_FILE:core_mix_reducer_static>
$<TARGET_FILE:core_metric_static>
$<TARGET_FILE:core_utility_static>
$<TARGET_FILE:core_quantizer_static>
-Wl,--no-whole-archive
zvec_db
)
target_link_options(_zvec PRIVATE
"LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map"
)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|arm")
target_link_libraries(_zvec PRIVATE
-Wl,--whole-archive
$<TARGET_FILE:core_knn_flat_static>
$<TARGET_FILE:core_knn_flat_sparse_static>
$<TARGET_FILE:core_knn_hnsw_static>
$<TARGET_FILE:core_knn_hnsw_rabitq_static>
$<TARGET_FILE:core_knn_hnsw_sparse_static>
$<TARGET_FILE:core_knn_ivf_static>
$<TARGET_FILE:core_knn_cluster_static>
$<TARGET_FILE:core_mix_reducer_static>
$<TARGET_FILE:core_metric_static>
$<TARGET_FILE:core_utility_static>
$<TARGET_FILE:core_quantizer_static>
-Wl,--no-whole-archive
zvec_db
)
target_link_options(_zvec PRIVATE
"LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map"
)
else ()
target_link_libraries(_zvec PRIVATE
-Wl,--whole-archive
$<TARGET_FILE:core_knn_flat_static>
$<TARGET_FILE:core_knn_flat_sparse_static>
$<TARGET_FILE:core_knn_hnsw_static>
$<TARGET_FILE:core_knn_hnsw_rabitq_static>
$<TARGET_FILE:core_knn_hnsw_sparse_static>
$<TARGET_FILE:core_knn_ivf_static>
$<TARGET_FILE:core_knn_diskann_static>
$<TARGET_FILE:core_knn_cluster_static>
$<TARGET_FILE:core_mix_reducer_static>
$<TARGET_FILE:core_metric_static>
$<TARGET_FILE:core_utility_static>
$<TARGET_FILE:core_quantizer_static>
-Wl,--no-whole-archive
zvec_db
aio
)
target_link_options(_zvec PRIVATE
"LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map"
)
endif()
elseif (APPLE)
target_link_libraries(_zvec PRIVATE
-Wl,-force_load,$<TARGET_FILE:core_knn_flat_static>
Expand Down
Loading
Loading