From 4038b0653e60ec67d4c2ef2035339b337331db93 Mon Sep 17 00:00:00 2001 From: "shengjun.li" Date: Thu, 9 Jul 2020 09:23:49 +0800 Subject: [PATCH] fix too many data copies (#2784) Signed-off-by: shengjun.li --- CHANGELOG.md | 1 + .../knowhere/index/vector_index/IndexIVF.cpp | 10 ++++------ .../knowhere/index/vector_index/IndexIVFPQ.cpp | 12 ++++++------ .../knowhere/index/vector_index/IndexIVFSQ.cpp | 8 +++----- .../knowhere/index/vector_index/IndexNSG.cpp | 4 +--- 5 files changed, 15 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb1a799d..12fb7064 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Please mark all change in change log and use the issue from GitHub - \#2690 Remove body parser in show-partitions endpoints - \#2692 Milvus hangs during multi-thread concurrent search - \#2739 Fix mishards start failed +- \#2776 Fix too many data copies during creating IVF index ## Feature diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 67c79774..fdd8265c 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -66,13 +66,11 @@ void IVF::Train(const DatasetPtr& dataset_ptr, const Config& config) { GETTENSOR(dataset_ptr) - faiss::Index* coarse_quantizer = new faiss::IndexFlatL2(dim); - int64_t nlist = config[IndexParams::nlist].get(); faiss::MetricType metric_type = GetMetricType(config[Metric::TYPE].get()); - auto index = std::make_shared(coarse_quantizer, dim, nlist, metric_type); - index->train(rows, (float*)p_data); - - index_.reset(faiss::clone_index(index.get())); + faiss::Index* coarse_quantizer = new faiss::IndexFlat(dim, metric_type); + int64_t nlist = config[IndexParams::nlist].get(); + index_ = std::shared_ptr(new faiss::IndexIVFFlat(coarse_quantizer, dim, nlist, metric_type)); + index_->train(rows, (float*)p_data); } void diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp index c6a7e4de..662fefba 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp @@ -35,13 +35,13 @@ void IVFPQ::Train(const DatasetPtr& dataset_ptr, const Config& config) { GETTENSOR(dataset_ptr) - faiss::Index* coarse_quantizer = new faiss::IndexFlat(dim, GetMetricType(config[Metric::TYPE].get())); - auto index = std::make_shared(coarse_quantizer, dim, config[IndexParams::nlist].get(), - config[IndexParams::m].get(), - config[IndexParams::nbits].get()); - index->train(rows, (float*)p_data); + faiss::MetricType metric_type = GetMetricType(config[Metric::TYPE].get()); + faiss::Index* coarse_quantizer = new faiss::IndexFlat(dim, metric_type); + index_ = std::shared_ptr(new faiss::IndexIVFPQ( + coarse_quantizer, dim, config[IndexParams::nlist].get(), config[IndexParams::m].get(), + config[IndexParams::nbits].get(), metric_type)); - index_.reset(faiss::clone_index(index.get())); + index_->train(rows, (float*)p_data); } VecIndexPtr diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp index 39d26ad8..51ae44cb 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp @@ -38,11 +38,9 @@ IVFSQ::Train(const DatasetPtr& dataset_ptr, const Config& config) { std::stringstream index_type; index_type << "IVF" << config[IndexParams::nlist] << "," << "SQ" << config[IndexParams::nbits]; - auto build_index = - faiss::index_factory(dim, index_type.str().c_str(), GetMetricType(config[Metric::TYPE].get())); - build_index->train(rows, (float*)p_data); - - index_.reset(faiss::clone_index(build_index)); + index_ = std::shared_ptr( + faiss::index_factory(dim, index_type.str().c_str(), GetMetricType(config[Metric::TYPE].get()))); + index_->train(rows, (float*)p_data); } VecIndexPtr diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp index 4442d056..f7812e6e 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp @@ -139,9 +139,7 @@ NSG::Train(const DatasetPtr& dataset_ptr, const Config& config) { b_params.out_degree = config[IndexParams::out_degree]; b_params.search_length = config[IndexParams::search_length]; - auto p_ids = dataset_ptr->Get(meta::IDS); - - GETTENSOR(dataset_ptr) + GETTENSORWITHIDS(dataset_ptr) index_ = std::make_shared(dim, rows, config[Metric::TYPE].get()); index_->SetKnnGraph(knng); index_->Build_with_ids(rows, (float*)p_data, (int64_t*)p_ids, b_params); -- GitLab