From 4931d58814ea2d3e85b2cd48fa0e8e7bb8aea01d Mon Sep 17 00:00:00 2001 From: huangliang Date: Thu, 16 Oct 2025 10:08:47 +0000 Subject: [PATCH] feat: add AlayaLite --- .github/workflows/benchmarks.yml | 1 + README.md | 1 + .../algorithms/alayalite/Dockerfile | 6 ++ .../algorithms/alayalite/config.yml | 20 ++++++ ann_benchmarks/algorithms/alayalite/module.py | 62 +++++++++++++++++++ 5 files changed, 90 insertions(+) create mode 100644 ann_benchmarks/algorithms/alayalite/Dockerfile create mode 100644 ann_benchmarks/algorithms/alayalite/config.yml create mode 100644 ann_benchmarks/algorithms/alayalite/module.py diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 1d1585d6c..4515850c5 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -29,6 +29,7 @@ jobs: matrix: dataset: [random-xs-20-angular] library: + - alayalite - annoy - balltree - bruteforce diff --git a/README.md b/README.md index d02dc8f07..8aae4d319 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ Evaluated * [kgn](https://github.com/Henry-yan/kgn) * [vsag](https://github.com/antgroup/vsag) * [PGVectorScale](https://github.com/timescale/pgvectorscale/tree/main) +* [AlayaLite](https://github.com/AlayaDB-AI/AlayaLite.git) ![https://img.shields.io/github/stars/AlayaDB-AI/AlayaLite?style=social](https://img.shields.io/github/stars/AlayaDB-AI/AlayaLite?style=social) Data sets ========= diff --git a/ann_benchmarks/algorithms/alayalite/Dockerfile b/ann_benchmarks/algorithms/alayalite/Dockerfile new file mode 100644 index 000000000..2d06cf44e --- /dev/null +++ b/ann_benchmarks/algorithms/alayalite/Dockerfile @@ -0,0 +1,6 @@ +FROM ann-benchmarks + +RUN apt-get update && apt-get install -y wget +RUN wget https://github.com/AlayaDB-AI/AlayaLite/releases/download/v0.1.0a2/alayalite-0.1.0a2-cp310-cp310-linux_x86_64.whl -O /home/app/alayalite-0.1.0a2-cp310-cp310-linux_x86_64.whl +RUN python3 -m pip install /home/app/alayalite-0.1.0a2-cp310-cp310-linux_x86_64.whl +RUN python3 -c 'import alayalite' \ No newline at end of file diff --git a/ann_benchmarks/algorithms/alayalite/config.yml b/ann_benchmarks/algorithms/alayalite/config.yml new file mode 100644 index 000000000..c62d0f181 --- /dev/null +++ b/ann_benchmarks/algorithms/alayalite/config.yml @@ -0,0 +1,20 @@ +float: + any: + - base_args: ['@metric', '@dimension'] + constructor: AlayaLite + disabled: false + docker_tag: ann-benchmarks-alayalite + module: ann_benchmarks.algorithms.alayalite + name: AlayaLite + run_groups: + alayalite: + #### index_type HNSW FLAT + args: + M: 32 + R: [32] + L: 200 + index_type: NONE + quantization_type: RABITQ + fit_threads: 1 # only batch search + search_threads: 1 # only batch search + query_args: [[10, 20, 40, 60, 80, 120, 200, 400, 600, 800]] \ No newline at end of file diff --git a/ann_benchmarks/algorithms/alayalite/module.py b/ann_benchmarks/algorithms/alayalite/module.py new file mode 100644 index 000000000..d6bbca62d --- /dev/null +++ b/ann_benchmarks/algorithms/alayalite/module.py @@ -0,0 +1,62 @@ +import os +from dataclasses import dataclass, field +import numpy as np +from alayalite import Client +from alayalite import Index +from ..base.module import BaseANN + + +class AlayaLite(BaseANN): + def __init__(self, metric, dim, method_param): + self.index_save_dir = 'alaya_indices' + self.client = Client(self.index_save_dir) + self.index = None + self.ef = None + self.dim = dim + self.metric = metric + + self.index_type = method_param['index_type'] + self.quantization_type = method_param['quantization_type'] + self.fit_threads = method_param['fit_threads'] + self.search_threads = method_param['search_threads'] + self.R = method_param['R'] + self.L = method_param['L'] + self.M = method_param['M'] + + self.save_index_name = f'alayalite_index_it_{self.index_type}_qt_{self.quantization_type}_dim_{self.dim}_metric_{self.metric}_M{self.M}.idx' + print("alaya init done") + + def fit(self, X: np.array) -> None: + + if os.path.exists(os.path.join(self.index_save_dir, self.save_index_name)): + self.index = Index.load(self.index_save_dir, self.save_index_name) + print('load index from cache') + else: + X = X.astype(np.float32) + self.index = self.client.create_index(name=self.save_index_name, metric=self.metric, quantization_type=self.quantization_type, capacity=X.shape[0]) + self.index.fit(vectors=X, num_threads=self.fit_threads) + self.client.save_index(self.save_index_name) + print('save index to cache') + + def set_query_arguments(self, ef): + self.ef = int(ef) + + def prepare_query(self, q: np.array, n: int): + self.q = q + self.n = n + + def run_prepared_query(self): + self.res = self.index.search(query=self.q, topk=self.n, ef_search=self.ef) + + def batch_query(self, X: np.array, n: int) -> None: + self.res = self.index.batch_search(queries=X, topk=n, ef_search=self.ef) + + def get_prepared_query_results(self): + return self.res + + def get_batch_results(self) -> np.array: + return self.res + + def __str__(self) -> str: + return "AlayaLite" +