未验证 提交 239d4449 编写于 作者: O op-hunter 提交者: GitHub

separate rhnsw quantization data from index data and fix read compres… (#3485)

* separate rhnsw quantization data from index data and fix read compress data bug in passing
Signed-off-by: Ncmli <chengming.li@zilliz.com>

* fix unittest
Signed-off-by: Ncmli <chengming.li@zilliz.com>
Co-authored-by: Ncmli <chengming.li@zilliz.com>
上级 bdcd341f
...@@ -55,6 +55,7 @@ VectorCompressFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::strin ...@@ -55,6 +55,7 @@ VectorCompressFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::strin
return Status(SERVER_UNEXPECTED_ERROR, "Invalid vector compress length: " + full_file_path); return Status(SERVER_UNEXPECTED_ERROR, "Invalid vector compress length: " + full_file_path);
} }
compress = std::make_shared<knowhere::Binary>();
compress->data = std::shared_ptr<uint8_t[]>(new uint8_t[length]); compress->data = std::shared_ptr<uint8_t[]>(new uint8_t[length]);
compress->size = length; compress->size = length;
......
...@@ -180,7 +180,7 @@ RequireRawFile(const std::string& index_type) { ...@@ -180,7 +180,7 @@ RequireRawFile(const std::string& index_type) {
bool bool
RequireCompressFile(const std::string& index_type) { RequireCompressFile(const std::string& index_type) {
return index_type == knowhere::IndexEnum::INDEX_RHNSWSQ; return index_type == knowhere::IndexEnum::INDEX_RHNSWSQ || index_type == knowhere::IndexEnum::INDEX_RHNSWPQ;
} }
} // namespace utils } // namespace utils
......
...@@ -39,7 +39,7 @@ IndexRHNSWPQ::Serialize(const Config& config) { ...@@ -39,7 +39,7 @@ IndexRHNSWPQ::Serialize(const Config& config) {
try { try {
auto res_set = IndexRHNSW::Serialize(config); auto res_set = IndexRHNSW::Serialize(config);
MemoryIOWriter writer; MemoryIOWriter writer;
writer.name = this->index_type() + "_Data"; writer.name = QUANTIZATION_DATA;
auto real_idx = dynamic_cast<faiss::IndexRHNSWPQ*>(index_.get()); auto real_idx = dynamic_cast<faiss::IndexRHNSWPQ*>(index_.get());
if (real_idx == nullptr) { if (real_idx == nullptr) {
KNOWHERE_THROW_MSG("dynamic_cast<faiss::IndexRHNSWPQ*>(index_) failed during Serialize!"); KNOWHERE_THROW_MSG("dynamic_cast<faiss::IndexRHNSWPQ*>(index_) failed during Serialize!");
...@@ -59,7 +59,7 @@ IndexRHNSWPQ::Load(const BinarySet& index_binary) { ...@@ -59,7 +59,7 @@ IndexRHNSWPQ::Load(const BinarySet& index_binary) {
try { try {
IndexRHNSW::Load(index_binary); IndexRHNSW::Load(index_binary);
MemoryIOReader reader; MemoryIOReader reader;
reader.name = this->index_type() + "_Data"; reader.name = QUANTIZATION_DATA;
auto binary = index_binary.GetByName(reader.name); auto binary = index_binary.GetByName(reader.name);
reader.total = static_cast<size_t>(binary->size); reader.total = static_cast<size_t>(binary->size);
......
...@@ -42,7 +42,7 @@ IndexRHNSWSQ::Serialize(const Config& config) { ...@@ -42,7 +42,7 @@ IndexRHNSWSQ::Serialize(const Config& config) {
try { try {
auto res_set = IndexRHNSW::Serialize(config); auto res_set = IndexRHNSW::Serialize(config);
MemoryIOWriter writer; MemoryIOWriter writer;
writer.name = this->index_type() + "_Data"; writer.name = QUANTIZATION_DATA;
auto real_idx = dynamic_cast<faiss::IndexRHNSWSQ*>(index_.get()); auto real_idx = dynamic_cast<faiss::IndexRHNSWSQ*>(index_.get());
if (real_idx == nullptr) { if (real_idx == nullptr) {
KNOWHERE_THROW_MSG("dynamic_cast<faiss::IndexRHNSWSQ*>(index_) failed during Serialize!"); KNOWHERE_THROW_MSG("dynamic_cast<faiss::IndexRHNSWSQ*>(index_) failed during Serialize!");
...@@ -62,7 +62,7 @@ IndexRHNSWSQ::Load(const BinarySet& index_binary) { ...@@ -62,7 +62,7 @@ IndexRHNSWSQ::Load(const BinarySet& index_binary) {
try { try {
IndexRHNSW::Load(index_binary); IndexRHNSW::Load(index_binary);
MemoryIOReader reader; MemoryIOReader reader;
reader.name = this->index_type() + "_Data"; reader.name = QUANTIZATION_DATA;
auto binary = index_binary.GetByName(reader.name); auto binary = index_binary.GetByName(reader.name);
reader.total = static_cast<size_t>(binary->size); reader.total = static_cast<size_t>(binary->size);
......
...@@ -123,7 +123,7 @@ TEST_P(RHNSWPQTest, HNSW_serialize) { ...@@ -123,7 +123,7 @@ TEST_P(RHNSWPQTest, HNSW_serialize) {
index_->Add(base_dataset, conf); index_->Add(base_dataset, conf);
auto binaryset = index_->Serialize(conf); auto binaryset = index_->Serialize(conf);
auto bin_idx = binaryset.GetByName(index_->index_type() + "_Index"); auto bin_idx = binaryset.GetByName(index_->index_type() + "_Index");
auto bin_dat = binaryset.GetByName(index_->index_type() + "_Data"); auto bin_dat = binaryset.GetByName(QUANTIZATION_DATA);
std::string filename_idx = "/tmp/RHNSWPQ_test_serialize_idx.bin"; std::string filename_idx = "/tmp/RHNSWPQ_test_serialize_idx.bin";
std::string filename_dat = "/tmp/RHNSWPQ_test_serialize_dat.bin"; std::string filename_dat = "/tmp/RHNSWPQ_test_serialize_dat.bin";
...@@ -137,7 +137,7 @@ TEST_P(RHNSWPQTest, HNSW_serialize) { ...@@ -137,7 +137,7 @@ TEST_P(RHNSWPQTest, HNSW_serialize) {
std::shared_ptr<uint8_t[]> dat(load_dat); std::shared_ptr<uint8_t[]> dat(load_dat);
std::shared_ptr<uint8_t[]> idx(load_idx); std::shared_ptr<uint8_t[]> idx(load_idx);
binaryset.Append(new_idx->index_type() + "_Index", idx, bin_idx->size); binaryset.Append(new_idx->index_type() + "_Index", idx, bin_idx->size);
binaryset.Append(new_idx->index_type() + "_Data", dat, bin_dat->size); binaryset.Append(QUANTIZATION_DATA, dat, bin_dat->size);
new_idx->Load(binaryset); new_idx->Load(binaryset);
EXPECT_EQ(new_idx->Count(), nb); EXPECT_EQ(new_idx->Count(), nb);
......
...@@ -124,7 +124,7 @@ TEST_P(RHNSWSQ8Test, HNSW_serialize) { ...@@ -124,7 +124,7 @@ TEST_P(RHNSWSQ8Test, HNSW_serialize) {
index_->Add(base_dataset, conf); index_->Add(base_dataset, conf);
auto binaryset = index_->Serialize(conf); auto binaryset = index_->Serialize(conf);
auto bin_idx = binaryset.GetByName(index_->index_type() + "_Index"); auto bin_idx = binaryset.GetByName(index_->index_type() + "_Index");
auto bin_dat = binaryset.GetByName(index_->index_type() + "_Data"); auto bin_dat = binaryset.GetByName(QUANTIZATION_DATA);
std::string filename_idx = "/tmp/RHNSWSQ_test_serialize_idx.bin"; std::string filename_idx = "/tmp/RHNSWSQ_test_serialize_idx.bin";
std::string filename_dat = "/tmp/RHNSWSQ_test_serialize_dat.bin"; std::string filename_dat = "/tmp/RHNSWSQ_test_serialize_dat.bin";
...@@ -138,7 +138,7 @@ TEST_P(RHNSWSQ8Test, HNSW_serialize) { ...@@ -138,7 +138,7 @@ TEST_P(RHNSWSQ8Test, HNSW_serialize) {
std::shared_ptr<uint8_t[]> dat(load_dat); std::shared_ptr<uint8_t[]> dat(load_dat);
std::shared_ptr<uint8_t[]> idx(load_idx); std::shared_ptr<uint8_t[]> idx(load_idx);
binaryset.Append(new_idx->index_type() + "_Index", idx, bin_idx->size); binaryset.Append(new_idx->index_type() + "_Index", idx, bin_idx->size);
binaryset.Append(new_idx->index_type() + "_Data", dat, bin_dat->size); binaryset.Append(QUANTIZATION_DATA, dat, bin_dat->size);
new_idx->Load(binaryset); new_idx->Load(binaryset);
EXPECT_EQ(new_idx->Count(), nb); EXPECT_EQ(new_idx->Count(), nb);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册