From d03df4459636b4a9d5f631b3d7b9d22ccad6beb0 Mon Sep 17 00:00:00 2001 From: LTLA Date: Mon, 15 Dec 2025 01:45:45 +1100 Subject: [PATCH 1/5] Minor fixes for compilation warnings in headers and bindings. Mostly involves unused variables and signed/unsigned comparisons. --- hnswlib/bruteforce.h | 3 +- hnswlib/hnswalg.h | 6 ++- hnswlib/hnswlib.h | 9 ++++- python_bindings/bindings.cpp | 73 ++++++++++++++++++++---------------- setup.py | 2 +- 5 files changed, 56 insertions(+), 37 deletions(-) diff --git a/hnswlib/bruteforce.h b/hnswlib/bruteforce.h index cff0a67d..16c266a3 100644 --- a/hnswlib/bruteforce.h +++ b/hnswlib/bruteforce.h @@ -65,6 +65,7 @@ class BruteforceSearch : public AlgorithmInterface { Status addPointNoExceptions(const void *datapoint, labeltype label, bool replace_deleted = false) override { + (void)replace_deleted; // silence unused variable warning. int idx; { std::unique_lock lock(index_lock); @@ -113,7 +114,7 @@ class BruteforceSearch : public AlgorithmInterface { assert(k <= cur_element_count); std::priority_queue> topResults; dist_t lastdist = std::numeric_limits::max(); - for (int i = 0; i < cur_element_count; i++) { + for (size_t i = 0; i < cur_element_count; i++) { dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_); if (dist <= lastdist || topResults.size() < k) { labeltype label = *((labeltype *) (data_ + size_per_element_ * i + data_size_)); diff --git a/hnswlib/hnswalg.h b/hnswlib/hnswalg.h index a04b2ed4..0f470152 100644 --- a/hnswlib/hnswalg.h +++ b/hnswlib/hnswalg.h @@ -87,6 +87,7 @@ class HierarchicalNSW : public AlgorithmInterface { size_t max_elements = 0, bool allow_replace_deleted = false) : allow_replace_deleted_(allow_replace_deleted) { + (void)nmslib; // silence unused variable warnings. loadIndex(location, s, max_elements); } @@ -543,6 +544,7 @@ class HierarchicalNSW : public AlgorithmInterface { std::priority_queue, std::vector>, CompareByFirst> &top_candidates, int level, bool isUpdate) { + (void)data_point; // silence unused variable warning. size_t Mcurmax = level ? maxM_ : maxM0_; getNeighborsByHeuristic2(top_candidates, M_); if (top_candidates.size() > M_) @@ -1291,7 +1293,7 @@ class HierarchicalNSW : public AlgorithmInterface { tableint *datal = (tableint *) (data + 1); for (int i = 0; i < size; i++) { tableint cand = datal[i]; - if (cand < 0 || cand > max_elements_) + if (cand > max_elements_) return Status("cand error"); dist_t d = fstdistfunc_(data_point, getDataByInternalId(cand), dist_func_param_); if (d < curdist) { @@ -1365,7 +1367,7 @@ class HierarchicalNSW : public AlgorithmInterface { tableint *datal = (tableint *) (data + 1); for (int i = 0; i < size; i++) { tableint cand = datal[i]; - if (cand < 0 || cand > max_elements_) + if (cand > max_elements_) return Status("cand error"); dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); diff --git a/hnswlib/hnswlib.h b/hnswlib/hnswlib.h index 570e876b..f99644d4 100644 --- a/hnswlib/hnswlib.h +++ b/hnswlib/hnswlib.h @@ -92,6 +92,7 @@ static bool AVXCapable() { return HW_AVX && avxSupported; } +#if defined(USE_AVX512) static bool AVX512Capable() { if (!AVXCapable()) return false; @@ -122,6 +123,8 @@ static bool AVX512Capable() { } #endif +#endif + #include #include #include @@ -212,7 +215,11 @@ typedef size_t labeltype; // This can be extended to store state for filtering (e.g. from a std::set) class BaseFilterFunctor { public: - virtual bool operator()(hnswlib::labeltype id) { return true; } + virtual bool operator()(hnswlib::labeltype id) { + (void)id; // silence unused variable warning. + return true; + } + virtual ~BaseFilterFunctor() {}; }; diff --git a/python_bindings/bindings.cpp b/python_bindings/bindings.cpp index babf9741..0bb6568c 100644 --- a/python_bindings/bindings.cpp +++ b/python_bindings/bindings.cpp @@ -4,6 +4,7 @@ #include #include #include "hnswlib.h" +#include #include #include #include @@ -100,8 +101,8 @@ inline void get_input_array_shapes(const py::buffer_info& buffer, size_t* rows, if (buffer.ndim != 2 && buffer.ndim != 1) { char msg[256]; snprintf(msg, sizeof(msg), - "Input vector data wrong shape. Number of dimensions %d. Data must be a 1D or 2D array.", - buffer.ndim); + "Input vector data wrong shape. Number of dimensions %zd. Data must be a 1D or 2D array.", + (ssize_t)buffer.ndim); HNSWLIB_THROW_RUNTIME_ERROR(msg); } if (buffer.ndim == 2) { @@ -113,6 +114,16 @@ inline void get_input_array_shapes(const py::buffer_info& buffer, size_t* rows, } } +// Quick and dirty implementations of C++20's std::cmp_equal() and friends. +template +bool safe_unsigned_eq(Left_ l, Right_ r) { + return static_cast::type>(l) == static_cast::type>(r); +} + +template +bool safe_unsigned_lte(Left_ l, Right_ r) { + return static_cast::type>(l) <= static_cast::type>(r); +} inline std::vector get_input_ids_and_check_shapes(const py::object& ids_, size_t feature_rows) { std::vector ids; @@ -120,12 +131,12 @@ inline std::vector get_input_ids_and_check_shapes(const py::object& ids_ py::array_t < size_t, py::array::c_style | py::array::forcecast > items(ids_); auto ids_numpy = items.request(); // check shapes - if (!((ids_numpy.ndim == 1 && ids_numpy.shape[0] == feature_rows) || + if (!((ids_numpy.ndim == 1 && safe_unsigned_eq(ids_numpy.shape[0], feature_rows)) || (ids_numpy.ndim == 0 && feature_rows == 1))) { char msg[256]; snprintf(msg, sizeof(msg), - "The input label shape %d does not match the input data vector shape %d", - ids_numpy.ndim, feature_rows); + "The input label shape %zd does not match the input data vector shape %zu", + (ssize_t)ids_numpy.ndim, feature_rows); HNSWLIB_THROW_RUNTIME_ERROR(msg); } // extract data @@ -259,11 +270,11 @@ class Index { size_t rows, features; get_input_array_shapes(buffer, &rows, &features); - if (features != dim) + if (!safe_unsigned_eq(features, dim)) HNSWLIB_THROW_RUNTIME_ERROR("Wrong dimensionality of the vectors"); // avoid using threads when the number of additions is small: - if (rows <= num_threads * 4) { + if (safe_unsigned_lte(rows, num_threads * 4)) { num_threads = 1; } @@ -287,6 +298,7 @@ class Index { py::gil_scoped_release l; if (normalize == false) { ParallelFor(start, rows, num_threads, [&](size_t row, size_t threadId) { + (void)threadId; // silence unused variable warnings. size_t id = ids.size() ? ids.at(row) : (cur_l + row); appr_alg->addPoint((void*)items.data(row), (size_t)id, replace_deleted); }); @@ -334,9 +346,9 @@ class Index { if (return_type == "list") { return py::cast(data); } - if (return_type == "numpy") { - return py::array_t< data_t, py::array::c_style | py::array::forcecast >(py::cast(data)); - } + + // Must be numpy if it's not a list. + return py::array_t< data_t, py::array::c_style | py::array::forcecast >(py::cast(data)); } @@ -394,19 +406,19 @@ class Index { } py::capsule free_when_done_l0(data_level0_npy, [](void* f) { - delete[] f; + delete[] reinterpret_cast(f); }); py::capsule free_when_done_lvl(element_levels_npy, [](void* f) { - delete[] f; + delete[] reinterpret_cast(f); }); py::capsule free_when_done_lb(label_lookup_key_npy, [](void* f) { - delete[] f; + delete[] reinterpret_cast(f); }); py::capsule free_when_done_id(label_lookup_val_npy, [](void* f) { - delete[] f; + delete[] reinterpret_cast(f); }); py::capsule free_when_done_ll(link_list_npy, [](void* f) { - delete[] f; + delete[] reinterpret_cast(f); }); /* TODO: serialize state of random generators appr_alg->level_generator_ and appr_alg->update_probability_generator_ */ @@ -557,11 +569,10 @@ class Index { auto link_list_npy = d["link_lists"].cast >(); for (size_t i = 0; i < appr_alg->cur_element_count; i++) { - if (label_lookup_val_npy.data()[i] < 0) { - HNSWLIB_THROW_RUNTIME_ERROR("Internal id cannot be negative!"); - } else { - appr_alg->label_lookup_.insert(std::make_pair(label_lookup_key_npy.data()[i], label_lookup_val_npy.data()[i])); - } +// if (label_lookup_val_npy.data()[i] < 0) { // unnecessary as tableint is unsigned. +// HNSWLIB_THROW_RUNTIME_ERROR("Internal id cannot be negative!"); +// } + appr_alg->label_lookup_.insert(std::make_pair(label_lookup_key_npy.data()[i], label_lookup_val_npy.data()[i])); } memcpy(appr_alg->element_levels_.data(), element_levels_npy.data(), element_levels_npy.nbytes()); @@ -630,7 +641,7 @@ class Index { get_input_array_shapes(buffer, &rows, &features); // avoid using threads when the number of searches is small: - if (rows <= num_threads * 4) { + if (safe_unsigned_lte(rows, num_threads * 4)) { num_threads = 1; } @@ -643,6 +654,7 @@ class Index { if (normalize == false) { ParallelFor(0, rows, num_threads, [&](size_t row, size_t threadId) { + (void)threadId; // silence unused variable warnings. std::priority_queue> result = appr_alg->searchKnn( (void*)items.data(row), k, p_idFilter); if (result.size() != k) @@ -658,8 +670,6 @@ class Index { } else { std::vector norm_array(num_threads * features); ParallelFor(0, rows, num_threads, [&](size_t row, size_t threadId) { - float* data = (float*)items.data(row); - size_t start_idx = threadId * dim; normalize_vector((float*)items.data(row), (norm_array.data() + start_idx)); @@ -678,10 +688,10 @@ class Index { } } py::capsule free_when_done_l(data_numpy_l, [](void* f) { - delete[] f; + delete[] reinterpret_cast(f); }); py::capsule free_when_done_d(data_numpy_d, [](void* f) { - delete[] f; + delete[] reinterpret_cast(f); }); return py::make_tuple( @@ -807,7 +817,7 @@ class BFIndex { size_t rows, features; get_input_array_shapes(buffer, &rows, &features); - if (features != dim) + if (!safe_unsigned_eq(features, dim)) HNSWLIB_THROW_RUNTIME_ERROR("Wrong dimensionality of the vectors"); std::vector ids = get_input_ids_and_check_shapes(ids_, rows); @@ -839,6 +849,7 @@ class BFIndex { void loadIndex(const std::string &path_to_index, size_t max_elements) { + (void)max_elements; // silence unused variable warnings. if (alg) { std::cerr << "Warning: Calling load_index for an already inited index. Old index is being deallocated." << std::endl; delete alg; @@ -875,6 +886,7 @@ class BFIndex { if (!normalize) { ParallelFor(0, rows, num_threads, [&](size_t row, size_t threadId) { + (void)threadId; // silence unused variable warnings. std::priority_queue> result = alg->searchKnn( (void*)items.data(row), k, p_idFilter); if (result.size() != k) @@ -909,10 +921,10 @@ class BFIndex { } py::capsule free_when_done_l(data_numpy_l, [](void *f) { - delete[] f; + delete[] reinterpret_cast(f); }); py::capsule free_when_done_d(data_numpy_d, [](void *f) { - delete[] f; + delete[] reinterpret_cast(f); }); @@ -932,9 +944,7 @@ class BFIndex { }; -PYBIND11_PLUGIN(hnswlib) { - py::module m("hnswlib"); - +PYBIND11_MODULE(hnswlib, m) { py::class_>(m, "Index") .def(py::init(&Index::createFromParams), py::arg("params")) /* WARNING: Index::createFromIndex is not thread-safe with Index::addItems */ @@ -1034,5 +1044,4 @@ PYBIND11_PLUGIN(hnswlib) { .def("get_max_elements", &BFIndex::getMaxElements) .def("get_current_count", &BFIndex::getCurrentCount) .def_readwrite("num_threads", &BFIndex::num_threads_default); - return m.ptr(); } diff --git a/setup.py b/setup.py index 0900adc6..e76a2e31 100644 --- a/setup.py +++ b/setup.py @@ -76,7 +76,7 @@ class BuildExt(build_ext): compiler_flag_native = '-march=native' c_opts = { 'msvc': ['/EHsc', '/openmp', '/O2'], - 'unix': ['-O3', compiler_flag_native], # , '-w' + 'unix': ['-O3', compiler_flag_native, '-Wall', '-Wextra', '-Wpedantic'] # , '-w' } link_opts = { 'unix': [], From 5b2fbcfefc26d07931d784bb30a77d39f093ab7d Mon Sep 17 00:00:00 2001 From: LTLA Date: Mon, 15 Dec 2025 01:51:32 +1100 Subject: [PATCH 2/5] ssize_t is not supported on Windows. --- python_bindings/bindings.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python_bindings/bindings.cpp b/python_bindings/bindings.cpp index 0bb6568c..6f48707e 100644 --- a/python_bindings/bindings.cpp +++ b/python_bindings/bindings.cpp @@ -101,8 +101,8 @@ inline void get_input_array_shapes(const py::buffer_info& buffer, size_t* rows, if (buffer.ndim != 2 && buffer.ndim != 1) { char msg[256]; snprintf(msg, sizeof(msg), - "Input vector data wrong shape. Number of dimensions %zd. Data must be a 1D or 2D array.", - (ssize_t)buffer.ndim); + "Input vector data wrong shape. Number of dimensions %lld. Data must be a 1D or 2D array.", + (long long)buffer.ndim); // use long long to avoid overflowing an int (%d) from a pybind11::ssize_t. HNSWLIB_THROW_RUNTIME_ERROR(msg); } if (buffer.ndim == 2) { @@ -135,8 +135,8 @@ inline std::vector get_input_ids_and_check_shapes(const py::object& ids_ (ids_numpy.ndim == 0 && feature_rows == 1))) { char msg[256]; snprintf(msg, sizeof(msg), - "The input label shape %zd does not match the input data vector shape %zu", - (ssize_t)ids_numpy.ndim, feature_rows); + "The input label shape %lld does not match the input data vector shape %zu", + (long long)ids_numpy.ndim, feature_rows); HNSWLIB_THROW_RUNTIME_ERROR(msg); } // extract data From 7cec19596756ef2195ce71cdf1b11769ab0bd8c8 Mon Sep 17 00:00:00 2001 From: LTLA Date: Mon, 15 Dec 2025 02:10:14 +1100 Subject: [PATCH 3/5] Silence the AVXCapable warnings. --- hnswlib/hnswlib.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hnswlib/hnswlib.h b/hnswlib/hnswlib.h index f99644d4..686a7318 100644 --- a/hnswlib/hnswlib.h +++ b/hnswlib/hnswlib.h @@ -65,6 +65,7 @@ static uint64_t xgetbv(unsigned int index) { // Adapted from https://github.com/Mysticial/FeatureDetector #define _XCR_XFEATURE_ENABLED_MASK 0 +#if defined(USE_AVX) static bool AVXCapable() { int cpuInfo[4]; @@ -91,6 +92,7 @@ static bool AVXCapable() { } return HW_AVX && avxSupported; } +#endif #if defined(USE_AVX512) static bool AVX512Capable() { From 45a56e274e6f96cf31cdc792f75bc097df12469b Mon Sep 17 00:00:00 2001 From: LTLA Date: Mon, 15 Dec 2025 02:22:16 +1100 Subject: [PATCH 4/5] Silence more warnings that weren't otherwise covered. --- hnswlib/bruteforce.h | 1 + hnswlib/hnswalg.h | 1 + hnswlib/hnswlib.h | 7 +++++-- hnswlib/stop_condition.h | 9 +++++++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/hnswlib/bruteforce.h b/hnswlib/bruteforce.h index 16c266a3..288170da 100644 --- a/hnswlib/bruteforce.h +++ b/hnswlib/bruteforce.h @@ -32,6 +32,7 @@ class BruteforceSearch : public AlgorithmInterface { size_per_element_(0), data_size_(0), dist_func_param_(nullptr) { + (void)s; // silence unused variable warnings. } diff --git a/hnswlib/hnswalg.h b/hnswlib/hnswalg.h index 0f470152..68e3520b 100644 --- a/hnswlib/hnswalg.h +++ b/hnswlib/hnswalg.h @@ -77,6 +77,7 @@ class HierarchicalNSW : public AlgorithmInterface { HierarchicalNSW(SpaceInterface *s) { + (void)s; // silence unused variable warnings. } diff --git a/hnswlib/hnswlib.h b/hnswlib/hnswlib.h index 686a7318..3fdfbb33 100644 --- a/hnswlib/hnswlib.h +++ b/hnswlib/hnswlib.h @@ -30,16 +30,19 @@ #ifdef _MSC_VER #include #include +#if defined(USE_AVX) static void cpuid(int32_t out[4], int32_t eax, int32_t ecx) { __cpuidex(out, eax, ecx); } static __int64 xgetbv(unsigned int x) { return _xgetbv(x); } +#endif #else #include #include #include +#if defined(USE_AVX) static void cpuid(int32_t cpuInfo[4], int32_t eax, int32_t ecx) { __cpuid_count(eax, ecx, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]); } @@ -49,6 +52,7 @@ static uint64_t xgetbv(unsigned int index) { return ((uint64_t)edx << 32) | eax; } #endif +#endif #if defined(USE_AVX512) #include @@ -92,7 +96,6 @@ static bool AVXCapable() { } return HW_AVX && avxSupported; } -#endif #if defined(USE_AVX512) static bool AVX512Capable() { @@ -124,7 +127,7 @@ static bool AVX512Capable() { return HW_AVX512F && avx512Supported; } #endif - +#endif #endif #include diff --git a/hnswlib/stop_condition.h b/hnswlib/stop_condition.h index 7d8d5a3b..165951fc 100644 --- a/hnswlib/stop_condition.h +++ b/hnswlib/stop_condition.h @@ -164,6 +164,7 @@ class MultiVectorSearchStopCondition : public BaseSearchStopCondition { } void add_point_to_result(labeltype label, const void *datapoint, dist_t dist) override { + (void)label; // silence unused variable warnings. DOCIDTYPE doc_id = space_.get_doc_id(datapoint); if (doc_counter_[doc_id] == 0) { curr_num_docs_ += 1; @@ -173,6 +174,8 @@ class MultiVectorSearchStopCondition : public BaseSearchStopCondition { } void remove_point_from_result(labeltype label, const void *datapoint, dist_t dist) override { + (void)label; // silence unused variable warnings. + (void)dist; DOCIDTYPE doc_id = space_.get_doc_id(datapoint); doc_counter_[doc_id] -= 1; if (doc_counter_[doc_id] == 0) { @@ -232,10 +235,16 @@ class EpsilonSearchStopCondition : public BaseSearchStopCondition { } void add_point_to_result(labeltype label, const void *datapoint, dist_t dist) override { + (void)label; // silence unused variable warnings; + (void)datapoint; + (void)dist; curr_num_items_ += 1; } void remove_point_from_result(labeltype label, const void *datapoint, dist_t dist) override { + (void)label; // silence unused variable warnings; + (void)datapoint; + (void)dist; curr_num_items_ -= 1; } From 4f98d3b06f2c0300177eb2c1f311e52b2e506bc5 Mon Sep 17 00:00:00 2001 From: LTLA Date: Mon, 15 Dec 2025 02:32:31 +1100 Subject: [PATCH 5/5] Removed yet another unused variable. --- hnswlib/space_ip.h | 1 - 1 file changed, 1 deletion(-) diff --git a/hnswlib/space_ip.h b/hnswlib/space_ip.h index 7547c5e6..415f042b 100644 --- a/hnswlib/space_ip.h +++ b/hnswlib/space_ip.h @@ -145,7 +145,6 @@ InnerProductDistanceSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const static float InnerProductSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { - float PORTABLE_ALIGN64 TmpRes[16]; float *pVect1 = (float *) pVect1v; float *pVect2 = (float *) pVect2v; size_t qty = *((size_t *) qty_ptr);