Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 5 additions & 12 deletions src/core/algorithm/hnsw/hnsw_chunk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
namespace zvec {
namespace core {

int ChunkBroker::init_storage(size_t chunk_size) {
int ChunkBroker::init_storage(uint32_t chunk_size) {
chunk_meta_.clear();
chunk_meta_.chunk_size = chunk_size;
chunk_meta_.create_time = ailego::Realtime::Seconds();
Expand Down Expand Up @@ -61,7 +61,7 @@ int ChunkBroker::init_storage(size_t chunk_size) {
return 0;
}

int ChunkBroker::load_storage(size_t chunk_size) {
int ChunkBroker::load_storage(uint32_t &chunk_size) {
IndexStorage::MemoryBlock data_block;
size_t size = chunk_meta_segment_->read(0UL, data_block,
chunk_meta_segment_->data_size());
Expand All @@ -71,13 +71,7 @@ int ChunkBroker::load_storage(size_t chunk_size) {
return IndexError_InvalidFormat;
}
std::memcpy(&chunk_meta_, data_block.data(), size);
if (chunk_meta_.chunk_size != chunk_size) {
LOG_ERROR(
"Params hnsw chunk size=%zu mismatch from previous %zu "
"in index",
chunk_size, (size_t)chunk_meta_.chunk_size);
return IndexError_Mismatch;
}
chunk_size = chunk_meta_.chunk_size;

*stats_.mutable_check_point() = stg_->check_point();
stats_.set_revision_id(chunk_meta_.revision_id);
Expand All @@ -102,8 +96,8 @@ int ChunkBroker::load_storage(size_t chunk_size) {
return 0;
}

int ChunkBroker::open(IndexStorage::Pointer stg, size_t max_index_size,
size_t chunk_size, bool check_crc) {
int ChunkBroker::open(IndexStorage::Pointer stg, uint32_t &chunk_size,
bool check_crc) {
if (ailego_unlikely(stg_)) {
LOG_ERROR("An storage instance is already opened");
return IndexError_Duplicate;
Expand All @@ -115,7 +109,6 @@ int ChunkBroker::open(IndexStorage::Pointer stg, size_t max_index_size,
page_mask_ = ailego::MemoryHelper::PageSize() - 1;
}
check_crc_ = check_crc;
max_chunks_size_ = max_index_size;
dirty_ = false;

const std::string segment_id =
Expand Down
11 changes: 7 additions & 4 deletions src/core/algorithm/hnsw/hnsw_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ class ChunkBroker {
ChunkBroker(IndexStreamer::Stats &stats) : stats_(stats) {}

//! Open storage
int open(IndexStorage::Pointer stg, size_t max_index_size, size_t chunk_size,
bool check_crc);
int open(IndexStorage::Pointer stg, uint32_t &chunk_size, bool check_crc);

int close(void);

Expand Down Expand Up @@ -88,6 +87,10 @@ class ChunkBroker {
return stg_;
}

void set_max_chunks_size(size_t max_chunks_size) {
max_chunks_size_ = max_chunks_size;
}

private:
ChunkBroker(const ChunkBroker &) = delete;
ChunkBroker &operator=(const ChunkBroker &) = delete;
Expand All @@ -113,10 +116,10 @@ class ChunkBroker {
"HnswChunkMeta must be aligned with 32 bytes");

//! Init the storage after open an empty index
int init_storage(size_t chunk_size);
int init_storage(uint32_t chunk_size);

//! Load index from storage
int load_storage(size_t chunk_size);
int load_storage(uint32_t &chunk_size);

static inline const std::string make_segment_id(int type, uint64_t seq_id) {
return "HnswT" + ailego::StringHelper::ToString(type) + "S" +
Expand Down
39 changes: 34 additions & 5 deletions src/core/algorithm/hnsw/hnsw_dist_calculator.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,14 @@ class HnswDistCalculator {
//! Return distance between query and node id.
inline dist_t dist(node_id_t id) {
compare_cnt_++;

const void *feat = entity_->get_vector(id);
IndexStorage::MemoryBlock vec_block;
int ret = entity_->get_vector(id, vec_block);
if (ailego_unlikely(ret != 0)) {
LOG_ERROR("Get nullptr vector, id=%u", id);
error_ = true;
return 0.0f;
}
const void *feat = vec_block.data();
if (ailego_unlikely(feat == nullptr)) {
LOG_ERROR("Get nullptr vector, id=%u", id);
error_ = true;
Expand All @@ -130,8 +136,24 @@ class HnswDistCalculator {
inline dist_t dist(node_id_t lhs, node_id_t rhs) {
compare_cnt_++;

const void *feat = entity_->get_vector(lhs);
const void *query = entity_->get_vector(rhs);

IndexStorage::MemoryBlock vec_block_feat;
int ret = entity_->get_vector(lhs, vec_block_feat);
if (ailego_unlikely(ret != 0)) {
LOG_ERROR("Get nullptr vector, id=%u", lhs);
error_ = true;
return 0.0f;
}
const void *feat = vec_block_feat.data();

IndexStorage::MemoryBlock vec_block_query;
ret = entity_->get_vector(rhs, vec_block_query);
if (ailego_unlikely(ret != 0)) {
LOG_ERROR("Get nullptr vector, id=%u", rhs);
error_ = true;
return 0.0f;
}
const void *query = vec_block_query.data();
if (ailego_unlikely(feat == nullptr || query == nullptr)) {
LOG_ERROR("Get nullptr vector");
error_ = true;
Expand Down Expand Up @@ -162,7 +184,14 @@ class HnswDistCalculator {
inline dist_t batch_dist(node_id_t id) {
compare_cnt_++;

const void *feat = entity_->get_vector(id);
IndexStorage::MemoryBlock vec_block;
int ret = entity_->get_vector(id, vec_block);
if (ailego_unlikely(ret != 0)) {
LOG_ERROR("Get nullptr vector, id=%u", id);
error_ = true;
return 0.0f;
}
const void *feat = vec_block.data();
if (ailego_unlikely(feat == nullptr)) {
LOG_ERROR("Get nullptr vector, id=%u", id);
error_ = true;
Expand Down
2 changes: 1 addition & 1 deletion src/core/algorithm/hnsw/hnsw_entity.h
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ class HnswEntity {
constexpr static uint32_t kDefaultDocsHardLimit = 1 << 30U; // 1 billion
constexpr static float kDefaultDocsSoftLimitRatio = 0.9f;
constexpr static size_t kMaxChunkSize = 0xFFFFFFFF;
constexpr static size_t kDefaultChunkSize = 2UL * 1024UL * 1024UL;
constexpr static size_t kDefaultChunkSize = 2 * 1024UL * 1024UL;
constexpr static size_t kDefaultMaxChunkCnt = 50000UL;
constexpr static float kDefaultNeighborPruneMultiplier =
1.0f; // prune_cnt = upper_max_neighbor_cnt * multiplier
Expand Down
Loading
Loading