Skip to content
Open
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ tags
compile_commands.json
.python-version
.vscode
.cache

# Python related files
__pycache__/
Expand Down
195 changes: 195 additions & 0 deletions bindings/python/include/svs/python/ivf_loader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
/*
* Copyright 2026 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

// svs
#include "svs/core/distance.h"
#include "svs/index/ivf/data_traits.h"
#include "svs/lib/datatype.h"
#include "svs/lib/exception.h"
#include "svs/lib/float16.h"
#include "svs/lib/saveload.h"

// toml
#include <toml++/toml.h>

// stl
#include <filesystem>
#include <string>

namespace svs::python::ivf_loader {

///
/// @brief Rebind an allocator to a different type.
///
template <typename Allocator, typename T>
using RebindAlloc = typename std::allocator_traits<Allocator>::template rebind_alloc<T>;

///
/// @brief Read and parse the data type configuration from a saved IVF index.
///
/// @param config_path Path to the configuration directory
/// @return The parsed DataTypeConfig
///
inline svs::index::ivf::DataTypeConfig read_data_type_config(const std::string& config_path
) {
auto config_file = std::filesystem::path(config_path) / svs::lib::config_file_name;
auto table = toml::parse_file(config_file.string());

// The data_type_config is nested inside "object" section
auto object_node = table["object"];
if (!object_node) {
throw ANNEXCEPTION("Config file missing 'object' section.");
}
auto* object_table = object_node.as_table();
if (!object_table) {
throw ANNEXCEPTION("'object' section is not a table.");
}

// Get the data_type_config section from object
auto data_type_node = (*object_table)["data_type_config"];
if (!data_type_node) {
throw ANNEXCEPTION("Config file missing 'data_type_config' section.");
}

// Convert to table and create ContextFreeLoadTable
auto* data_type_table = data_type_node.as_table();
if (!data_type_table) {
throw ANNEXCEPTION("data_type_config is not a table");
}
auto ctx_free = svs::lib::ContextFreeLoadTable(*data_type_table);
return svs::index::ivf::DataTypeConfig::load(ctx_free);
}

///
/// @brief Generic loader function template for IVF index assembly.
///
/// This template reduces boilerplate by providing a generic loader that can be
/// instantiated with different centroid and data types.
///
/// @tparam IndexType The IVF index type (svs::IVF or svs::DynamicIVF)
/// @tparam CentroidType The centroid type (Float16 or BFloat16)
/// @tparam DataType The data type for the clusters
///
template <typename IndexType, typename CentroidType, typename DataType>
IndexType load_typed(
const std::string& config_path,
const std::string& data_path,
svs::DistanceType distance_type,
size_t num_threads,
size_t intra_query_threads
) {
return IndexType::template assemble<float, CentroidType, DataType>(
config_path, data_path, distance_type, num_threads, intra_query_threads
);
}

///
/// @brief Loader for uncompressed IVF data with type dispatch.
///
/// Dispatches to the appropriate loader based on element type and centroid type.
///
/// @tparam IndexType The type of index to return
/// @tparam DataContainer The data container template (SimpleData or BlockedData)
/// @tparam Allocator The allocator type for the data (will be rebound to element type)
///
template <
typename IndexType,
template <typename, size_t, typename>
class DataContainer,
typename Allocator>
IndexType load_uncompressed_with_dispatch(
const std::string& config_path,
const std::string& data_path,
svs::DistanceType distance_type,
size_t num_threads,
size_t intra_query_threads,
const svs::index::ivf::DataTypeConfig& data_config
) {
bool is_f16_centroids = (data_config.centroid_type == svs::DataType::float16);
bool is_f16_data = (data_config.element_type == svs::DataType::float16);

// Dispatch based on data type and centroid type combinations
// Rebind the allocator to the appropriate element type
if (is_f16_data) {
using ReboundAlloc = RebindAlloc<Allocator, svs::Float16>;
using DataType = DataContainer<svs::Float16, svs::Dynamic, ReboundAlloc>;
if (is_f16_centroids) {
return load_typed<IndexType, svs::Float16, DataType>(
config_path, data_path, distance_type, num_threads, intra_query_threads
);
} else {
return load_typed<IndexType, svs::BFloat16, DataType>(
config_path, data_path, distance_type, num_threads, intra_query_threads
);
}
} else {
using ReboundAlloc = RebindAlloc<Allocator, float>;
using DataType = DataContainer<float, svs::Dynamic, ReboundAlloc>;
if (is_f16_centroids) {
return load_typed<IndexType, svs::Float16, DataType>(
config_path, data_path, distance_type, num_threads, intra_query_threads
);
} else {
return load_typed<IndexType, svs::BFloat16, DataType>(
config_path, data_path, distance_type, num_threads, intra_query_threads
);
}
}
}

///
/// @brief Generic IVF index loader with type dispatch based on saved configuration.
///
/// @tparam IndexType The type of index to return (svs::IVF or svs::DynamicIVF)
/// @tparam DataContainer The data container template (SimpleData or BlockedData)
/// @tparam Allocator The allocator type for uncompressed data
///
template <
typename IndexType,
template <typename, size_t, typename>
class DataContainer,
typename Allocator>
IndexType load_index_auto(
const std::string& config_path,
const std::string& data_path,
svs::DistanceType distance_type,
size_t num_threads,
size_t intra_query_threads
) {
auto data_config = read_data_type_config(config_path);

// Dispatch based on schema - only uncompressed supported in public repo
if (data_config.schema == "uncompressed_data") {
return load_uncompressed_with_dispatch<IndexType, DataContainer, Allocator>(
config_path,
data_path,
distance_type,
num_threads,
intra_query_threads,
data_config
);
}

throw ANNEXCEPTION(
"Unknown or unsupported data type schema: ",
data_config.schema,
". Only uncompressed data is supported in the public repository. "
);
}

} // namespace svs::python::ivf_loader
52 changes: 52 additions & 0 deletions bindings/python/src/dynamic_ivf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,18 @@
#include "svs/python/common.h"
#include "svs/python/core.h"
#include "svs/python/ivf.h"
#include "svs/python/ivf_loader.h"
#include "svs/python/manager.h"

// svs
#include "svs/index/ivf/data_traits.h"
#include "svs/lib/dispatcher.h"
#include "svs/lib/saveload.h"
#include "svs/orchestrators/dynamic_ivf.h"

// toml
#include <toml++/toml.h>

// pybind
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
Expand All @@ -34,6 +40,7 @@
#include <fmt/format.h>

// stl
#include <filesystem>
#include <span>

/////
Expand Down Expand Up @@ -342,6 +349,20 @@ void save_index(
index.save(config_path, data_dir);
}

// Load with auto-detection from saved config using common template dispatcher
svs::DynamicIVF load_index_auto(
const std::string& config_path,
const std::string& data_path,
svs::DistanceType distance_type,
size_t num_threads,
size_t intra_query_threads = 1
) {
return svs::python::ivf_loader::
load_index_auto<svs::DynamicIVF, svs::data::BlockedData, Allocator>(
config_path, data_path, distance_type, num_threads, intra_query_threads
);
}

void wrap(py::module& m) {
std::string name = "DynamicIVF";
py::class_<svs::DynamicIVF> dynamic_ivf(
Expand Down Expand Up @@ -530,6 +551,37 @@ It is the caller's responsibility to ensure that no existing data will be
overwritten when saving the index to this directory.
)"
);

// Loading
dynamic_ivf.def_static(
"load",
&load_index_auto,
py::arg("config_directory"),
py::arg("data_directory"),
py::arg("distance") = svs::L2,
py::arg("num_threads") = 1,
py::arg("intra_query_threads") = 1,
R"(
Load a saved DynamicIVF index from disk.

The data type (uncompressed with float32 or float16) and centroid type (bfloat16 or float16)
are automatically detected from the saved configuration file.

Args:
config_directory: Directory where index configuration was saved.
data_directory: Directory where the dataset was saved.
distance: The distance function to use.
num_threads: The number of threads to use for queries.
intra_query_threads: Number of threads for intra-query parallelism (default: 1).

Returns:
A loaded DynamicIVF index ready for searching and modifications.

Note:
This method auto-detects the data type from the saved configuration.
The index must have been saved with a version that includes data type information.
)"
);
}

} // namespace svs::python::dynamic_ivf
Loading
Loading