From 239d444998f1bc403350645227414e5e47d2e55c Mon Sep 17 00:00:00 2001 From: op-hunter Date: Thu, 27 Aug 2020 19:34:49 +0800 Subject: [PATCH] =?UTF-8?q?separate=20rhnsw=20quantization=20data=20from?= =?UTF-8?q?=20index=20data=20and=20fix=20read=20compres=E2=80=A6=20(#3485)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * separate rhnsw quantization data from index data and fix read compress data bug in passing Signed-off-by: cmli * fix unittest Signed-off-by: cmli Co-authored-by: cmli --- core/src/codecs/VectorCompressFormat.cpp | 1 + core/src/db/Utils.cpp | 2 +- .../knowhere/knowhere/index/vector_index/IndexRHNSWPQ.cpp | 4 ++-- .../knowhere/knowhere/index/vector_index/IndexRHNSWSQ.cpp | 4 ++-- core/src/index/unittest/test_rhnsw_pq.cpp | 4 ++-- core/src/index/unittest/test_rhnsw_sq8.cpp | 4 ++-- 6 files changed, 10 insertions(+), 9 deletions(-) diff --git a/core/src/codecs/VectorCompressFormat.cpp b/core/src/codecs/VectorCompressFormat.cpp index 83dd8d5a4..95ec385f6 100644 --- a/core/src/codecs/VectorCompressFormat.cpp +++ b/core/src/codecs/VectorCompressFormat.cpp @@ -55,6 +55,7 @@ VectorCompressFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::strin return Status(SERVER_UNEXPECTED_ERROR, "Invalid vector compress length: " + full_file_path); } + compress = std::make_shared(); compress->data = std::shared_ptr(new uint8_t[length]); compress->size = length; diff --git a/core/src/db/Utils.cpp b/core/src/db/Utils.cpp index f90721d23..ee07eab07 100644 --- a/core/src/db/Utils.cpp +++ b/core/src/db/Utils.cpp @@ -180,7 +180,7 @@ RequireRawFile(const std::string& index_type) { bool RequireCompressFile(const std::string& index_type) { - return index_type == knowhere::IndexEnum::INDEX_RHNSWSQ; + return index_type == knowhere::IndexEnum::INDEX_RHNSWSQ || index_type == knowhere::IndexEnum::INDEX_RHNSWPQ; } } // namespace utils diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexRHNSWPQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexRHNSWPQ.cpp index 7f86083eb..cc2e8f020 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexRHNSWPQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexRHNSWPQ.cpp @@ -39,7 +39,7 @@ IndexRHNSWPQ::Serialize(const Config& config) { try { auto res_set = IndexRHNSW::Serialize(config); MemoryIOWriter writer; - writer.name = this->index_type() + "_Data"; + writer.name = QUANTIZATION_DATA; auto real_idx = dynamic_cast(index_.get()); if (real_idx == nullptr) { KNOWHERE_THROW_MSG("dynamic_cast(index_) failed during Serialize!"); @@ -59,7 +59,7 @@ IndexRHNSWPQ::Load(const BinarySet& index_binary) { try { IndexRHNSW::Load(index_binary); MemoryIOReader reader; - reader.name = this->index_type() + "_Data"; + reader.name = QUANTIZATION_DATA; auto binary = index_binary.GetByName(reader.name); reader.total = static_cast(binary->size); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexRHNSWSQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexRHNSWSQ.cpp index 300cb6663..e352d6fa4 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexRHNSWSQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexRHNSWSQ.cpp @@ -42,7 +42,7 @@ IndexRHNSWSQ::Serialize(const Config& config) { try { auto res_set = IndexRHNSW::Serialize(config); MemoryIOWriter writer; - writer.name = this->index_type() + "_Data"; + writer.name = QUANTIZATION_DATA; auto real_idx = dynamic_cast(index_.get()); if (real_idx == nullptr) { KNOWHERE_THROW_MSG("dynamic_cast(index_) failed during Serialize!"); @@ -62,7 +62,7 @@ IndexRHNSWSQ::Load(const BinarySet& index_binary) { try { IndexRHNSW::Load(index_binary); MemoryIOReader reader; - reader.name = this->index_type() + "_Data"; + reader.name = QUANTIZATION_DATA; auto binary = index_binary.GetByName(reader.name); reader.total = static_cast(binary->size); diff --git a/core/src/index/unittest/test_rhnsw_pq.cpp b/core/src/index/unittest/test_rhnsw_pq.cpp index 99c0c09c6..54af49004 100644 --- a/core/src/index/unittest/test_rhnsw_pq.cpp +++ b/core/src/index/unittest/test_rhnsw_pq.cpp @@ -123,7 +123,7 @@ TEST_P(RHNSWPQTest, HNSW_serialize) { index_->Add(base_dataset, conf); auto binaryset = index_->Serialize(conf); auto bin_idx = binaryset.GetByName(index_->index_type() + "_Index"); - auto bin_dat = binaryset.GetByName(index_->index_type() + "_Data"); + auto bin_dat = binaryset.GetByName(QUANTIZATION_DATA); std::string filename_idx = "/tmp/RHNSWPQ_test_serialize_idx.bin"; std::string filename_dat = "/tmp/RHNSWPQ_test_serialize_dat.bin"; @@ -137,7 +137,7 @@ TEST_P(RHNSWPQTest, HNSW_serialize) { std::shared_ptr dat(load_dat); std::shared_ptr idx(load_idx); binaryset.Append(new_idx->index_type() + "_Index", idx, bin_idx->size); - binaryset.Append(new_idx->index_type() + "_Data", dat, bin_dat->size); + binaryset.Append(QUANTIZATION_DATA, dat, bin_dat->size); new_idx->Load(binaryset); EXPECT_EQ(new_idx->Count(), nb); diff --git a/core/src/index/unittest/test_rhnsw_sq8.cpp b/core/src/index/unittest/test_rhnsw_sq8.cpp index 79dc3e7fc..7e523ad2c 100644 --- a/core/src/index/unittest/test_rhnsw_sq8.cpp +++ b/core/src/index/unittest/test_rhnsw_sq8.cpp @@ -124,7 +124,7 @@ TEST_P(RHNSWSQ8Test, HNSW_serialize) { index_->Add(base_dataset, conf); auto binaryset = index_->Serialize(conf); auto bin_idx = binaryset.GetByName(index_->index_type() + "_Index"); - auto bin_dat = binaryset.GetByName(index_->index_type() + "_Data"); + auto bin_dat = binaryset.GetByName(QUANTIZATION_DATA); std::string filename_idx = "/tmp/RHNSWSQ_test_serialize_idx.bin"; std::string filename_dat = "/tmp/RHNSWSQ_test_serialize_dat.bin"; @@ -138,7 +138,7 @@ TEST_P(RHNSWSQ8Test, HNSW_serialize) { std::shared_ptr dat(load_dat); std::shared_ptr idx(load_idx); binaryset.Append(new_idx->index_type() + "_Index", idx, bin_idx->size); - binaryset.Append(new_idx->index_type() + "_Data", dat, bin_dat->size); + binaryset.Append(QUANTIZATION_DATA, dat, bin_dat->size); new_idx->Load(binaryset); EXPECT_EQ(new_idx->Count(), nb); -- GitLab