MS-568 Fix GpuResource free error

Former-commit-id: 52acd35e93a5293c70c45bb681fc54046b43a2cb

MS-568 Fix GpuResource free error
Former-commit-id: 52acd35e93a5293c70c45bb681fc54046b43a2cb
02270a62 · xiaojun.lin · aec55278 · 02270a62 · 02270a62 · 02270a62
10 changed file
--- a/cpp/CHANGELOG.md
+++ b/cpp/CHANGELOG.md
@@ -5,6 +5,7 @@ Please mark all change in change log and use the ticket from JIRA.
 # Milvus 0.5.0 (TODO)

 ## Bug
+- MS-568 - Fix gpuresource free error

 ## Improvement
 - MS-552 - Add and change the easylogging library

--- a/cpp/src/core/knowhere/CMakeLists.txt
+++ b/cpp/src/core/knowhere/CMakeLists.txt
@@ -48,6 +48,7 @@ set(index_srcs
        knowhere/index/vector_index/nsg/nsg_io.cpp
        knowhere/index/vector_index/nsg/utils.cpp
        knowhere/index/vector_index/cloner.cpp
+        knowhere/index/vector_index/FaissGpuResourceMgr.cpp
        )

 set(depend_libs

--- a/cpp/src/core/knowhere/knowhere/index/vector_index/FaissGpuResourceMgr.cpp
+++ b/cpp/src/core/knowhere/knowhere/index/vector_index/FaissGpuResourceMgr.cpp
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+#include "FaissGpuResourceMgr.h"
+
+
+namespace zilliz {
+namespace knowhere {
+
+FaissGpuResourceMgr &FaissGpuResourceMgr::GetInstance() {
+    static FaissGpuResourceMgr instance;
+    return instance;
+}
+
+void FaissGpuResourceMgr::AllocateTempMem(ResPtr &resource,
+                                          const int64_t &device_id,
+                                          const int64_t &size) {
+    if (size) {
+        resource->faiss_res->setTempMemory(size);
+    }
+    else {
+        auto search = devices_params_.find(device_id);
+        if (search != devices_params_.end()) {
+            resource->faiss_res->setTempMemory(search->second.temp_mem_size);
+        }
+        // else do nothing. allocate when use.
+    }
+}
+
+void FaissGpuResourceMgr::InitDevice(int64_t device_id,
+                                     int64_t pin_mem_size,
+                                     int64_t temp_mem_size,
+                                     int64_t res_num) {
+    DeviceParams params;
+    params.pinned_mem_size = pin_mem_size;
+    params.temp_mem_size = temp_mem_size;
+    params.resource_num = res_num;
+
+    devices_params_.emplace(device_id, params);
+}
+
+void FaissGpuResourceMgr::InitResource() {
+    if(is_init) return ;
+
+    is_init = true;
+
+    //std::cout << "InitResource" << std::endl;
+    for(auto& device : devices_params_) {
+        auto& device_id = device.first;
+
+        mutex_cache_.emplace(device_id, std::make_unique<std::mutex>());
+
+        //std::cout << "Device Id: " << device_id << std::endl;
+        auto& device_param = device.second;
+        auto& bq = idle_map_[device_id];
+
+        for (int64_t i = 0; i < device_param.resource_num; ++i) {
+            //std::cout << "Resource Id: " << i << std::endl;
+            auto raw_resource = std::make_shared<faiss::gpu::StandardGpuResources>();
+
+            // TODO(linxj): enable set pinned memory
+            auto res_wrapper = std::make_shared<Resource>(raw_resource);
+            AllocateTempMem(res_wrapper, device_id, 0);
+
+            bq.Put(res_wrapper);
+        }
+    }
+    //std::cout << "End initResource" << std::endl;
+}
+
+ResPtr FaissGpuResourceMgr::GetRes(const int64_t &device_id,
+                                   const int64_t &alloc_size) {
+    InitResource();
+
+    auto finder = idle_map_.find(device_id);
+    if (finder != idle_map_.end()) {
+        auto& bq = finder->second;
+        auto&& resource = bq.Take();
+        AllocateTempMem(resource, device_id, alloc_size);
+        return resource;
+    }
+    return nullptr;
+}
+
+void FaissGpuResourceMgr::MoveToIdle(const int64_t &device_id, const ResPtr &res) {
+    auto finder = idle_map_.find(device_id);
+    if (finder != idle_map_.end()) {
+        auto& bq = finder->second;
+        bq.Put(res);
+    }
+}
+
+void FaissGpuResourceMgr::Free() {
+    for (auto &item : idle_map_) {
+        auto& bq = item.second;
+        while (!bq.Empty()) {
+            bq.Take();
+        }
+    }
+    is_init = false;
+}
+
+void
+FaissGpuResourceMgr::Dump() {
+    for (auto &item : idle_map_) {
+        auto& bq = item.second;
+        std::cout << "device_id: " << item.first
+                  << ", resource count:" << bq.Size();
+    }
+}
+
+} // knowhere
+} // zilliz
--- a/cpp/src/core/knowhere/knowhere/index/vector_index/FaissGpuResourceMgr.h
+++ b/cpp/src/core/knowhere/knowhere/index/vector_index/FaissGpuResourceMgr.h
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <map>
+
+#include <faiss/gpu/StandardGpuResources.h>
+
+#include "src/utils/BlockingQueue.h"
+
+namespace zilliz {
+namespace knowhere {
+
+struct Resource {
+    explicit Resource(std::shared_ptr<faiss::gpu::StandardGpuResources> &r) : faiss_res(r) {
+        static int64_t global_id = 0;
+        id = global_id++;
+    }
+
+    std::shared_ptr<faiss::gpu::StandardGpuResources> faiss_res;
+    int64_t id;
+    std::mutex mutex;
+};
+using ResPtr = std::shared_ptr<Resource>;
+using ResWPtr = std::weak_ptr<Resource>;
+
+class FaissGpuResourceMgr {
+public:
+    friend class ResScope;
+    using ResBQ = zilliz::milvus::server::BlockingQueue<ResPtr>;
+
+public:
+    struct DeviceParams {
+        int64_t temp_mem_size = 0;
+        int64_t pinned_mem_size = 0;
+        int64_t resource_num = 2;
+    };
+
+public:
+    static FaissGpuResourceMgr &
+    GetInstance();
+
+    // Free gpu resource, avoid cudaGetDevice error when deallocate.
+    // this func should be invoke before main return
+    void
+    Free();
+
+    void
+    AllocateTempMem(ResPtr &resource, const int64_t& device_id, const int64_t& size);
+
+    void
+    InitDevice(int64_t device_id,
+               int64_t pin_mem_size = 0,
+               int64_t temp_mem_size = 0,
+               int64_t res_num = 2);
+
+    void
+    InitResource();
+
+    // allocate gpu memory invoke by build or copy_to_gpu
+    ResPtr
+    GetRes(const int64_t &device_id, const int64_t& alloc_size = 0);
+
+    void
+    MoveToIdle(const int64_t &device_id, const ResPtr& res);
+
+    void
+    Dump();
+
+protected:
+    bool is_init = false;
+
+    std::map<int64_t ,std::unique_ptr<std::mutex>> mutex_cache_;
+    std::map<int64_t, DeviceParams> devices_params_;
+    std::map<int64_t, ResBQ> idle_map_;
+};
+
+class ResScope {
+public:
+    ResScope(ResPtr &res, const int64_t& device_id, const bool& isown)
+            : resource(res), device_id(device_id), move(true), own(isown) {
+        Lock();
+    }
+
+    // specif for search
+    // get the ownership of gpuresource and gpu
+    ResScope(ResWPtr &res, const int64_t &device_id)
+        :device_id(device_id),move(false),own(true) {
+        resource = res.lock();
+        Lock();
+    }
+
+    void Lock() {
+        if (own) FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->lock();
+        resource->mutex.lock();
+    }
+
+    ~ResScope() {
+        if (own) FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->unlock();
+        if (move) FaissGpuResourceMgr::GetInstance().MoveToIdle(device_id, resource);
+        resource->mutex.unlock();
+    }
+
+private:
+    ResPtr resource; // hold resource until deconstruct
+    int64_t device_id;
+    bool move = true;
+    bool own = false;
+};
+
+} // knowhere
+} // zilliz
\ No newline at end of file
--- a/cpp/src/core/knowhere/knowhere/index/vector_index/gpu_ivf.cpp
+++ b/cpp/src/core/knowhere/knowhere/index/vector_index/gpu_ivf.cpp
@@ -67,9 +67,9 @@ void GPUIVF::set_index_model(IndexModelPtr model) {
    auto host_index = std::static_pointer_cast<IVFIndexModel>(model);
    if (auto gpures = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) {
        ResScope rs(gpures, gpu_id_, false);
-        res_ = gpures;
-        auto device_index = faiss::gpu::index_cpu_to_gpu(res_->faiss_res.get(), gpu_id_, host_index->index_.get());
+        auto device_index = faiss::gpu::index_cpu_to_gpu(gpures->faiss_res.get(), gpu_id_, host_index->index_.get());
        index_.reset(device_index);
+        res_ = gpures;
    } else {
        KNOWHERE_THROW_MSG("load index model error, can't get gpu_resource");
    }
@@ -114,9 +114,9 @@ void GPUIVF::LoadImpl(const BinarySet &index_binary) {

        if (auto temp_res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) {
            ResScope rs(temp_res, gpu_id_, false);
-            res_ = temp_res;
-            auto device_index = faiss::gpu::index_cpu_to_gpu(res_->faiss_res.get(), gpu_id_, index);
+            auto device_index = faiss::gpu::index_cpu_to_gpu(temp_res->faiss_res.get(), gpu_id_, index);
            index_.reset(device_index);
+            res_ = temp_res;
        } else {
            KNOWHERE_THROW_MSG("Load error, can't get gpu resource");
        }
@@ -176,12 +176,13 @@ VectorIndexPtr GPUIVF::CopyGpuToGpu(const int64_t &device_id, const Config &conf
    auto host_index = CopyGpuToCpu(config);
    return std::static_pointer_cast<IVF>(host_index)->CopyCpuToGpu(device_id, config);
 }
+
 void GPUIVF::Add(const DatasetPtr &dataset, const Config &config) {
-    auto temp_resource = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_);
-    if (temp_resource != nullptr) {
-        ResScope rs(temp_resource, gpu_id_, true);
+    if (auto spt = res_.lock()) {
+        ResScope rs(res_, gpu_id_);
        IVF::Add(dataset, config);
-    } else {
+    }
+    else {
        KNOWHERE_THROW_MSG("Add IVF can't get gpu resource");
    }
 }
@@ -264,108 +265,6 @@ VectorIndexPtr GPUIVFSQ::CopyGpuToCpu(const Config &config) {
    return std::make_shared<IVFSQ>(new_index);
 }

-FaissGpuResourceMgr &FaissGpuResourceMgr::GetInstance() {
-    static FaissGpuResourceMgr instance;
-    return instance;
-}
-
-void FaissGpuResourceMgr::AllocateTempMem(ResPtr &resource,
-                                          const int64_t &device_id,
-                                          const int64_t &size) {
-    if (size) {
-        resource->faiss_res->setTempMemory(size);
-    }
-    else {
-        auto search = devices_params_.find(device_id);
-        if (search != devices_params_.end()) {
-            resource->faiss_res->setTempMemory(search->second.temp_mem_size);
-        }
-        // else do nothing. allocate when use.
-    }
-}
-
-void FaissGpuResourceMgr::InitDevice(int64_t device_id,
-                                     int64_t pin_mem_size,
-                                     int64_t temp_mem_size,
-                                     int64_t res_num) {
-    DeviceParams params;
-    params.pinned_mem_size = pin_mem_size;
-    params.temp_mem_size = temp_mem_size;
-    params.resource_num = res_num;
-
-    devices_params_.emplace(device_id, params);
-}
-
-void FaissGpuResourceMgr::InitResource() {
-    if(is_init) return ;
-
-    is_init = true;
-
-    //std::cout << "InitResource" << std::endl;
-    for(auto& device : devices_params_) {
-        auto& device_id = device.first;
-
-        mutex_cache_.emplace(device_id, std::make_unique<std::mutex>());
-
-        //std::cout << "Device Id: " << device_id << std::endl;
-        auto& device_param = device.second;
-        auto& bq = idle_map_[device_id];
-
-        for (int64_t i = 0; i < device_param.resource_num; ++i) {
-            //std::cout << "Resource Id: " << i << std::endl;
-            auto raw_resource = std::make_shared<faiss::gpu::StandardGpuResources>();
-
-            // TODO(linxj): enable set pinned memory
-            auto res_wrapper = std::make_shared<Resource>(raw_resource);
-            AllocateTempMem(res_wrapper, device_id, 0);
-
-            bq.Put(res_wrapper);
-        }
-    }
-    //std::cout << "End initResource" << std::endl;
-}
-
-ResPtr FaissGpuResourceMgr::GetRes(const int64_t &device_id,
-                                   const int64_t &alloc_size) {
-    InitResource();
-
-    auto finder = idle_map_.find(device_id);
-    if (finder != idle_map_.end()) {
-        auto& bq = finder->second;
-        auto&& resource = bq.Take();
-        AllocateTempMem(resource, device_id, alloc_size);
-        return resource;
-    }
-    return nullptr;
-}
-
-void FaissGpuResourceMgr::MoveToIdle(const int64_t &device_id, const ResPtr &res) {
-    auto finder = idle_map_.find(device_id);
-    if (finder != idle_map_.end()) {
-        auto& bq = finder->second;
-        bq.Put(res);
-    }
-}
-
-void FaissGpuResourceMgr::Free() {
-    for (auto &item : idle_map_) {
-        auto& bq = item.second;
-        while (!bq.Empty()) {
-            bq.Take();
-        }
-    }
-    is_init = false;
-}
-
-void
-FaissGpuResourceMgr::Dump() {
-    for (auto &item : idle_map_) {
-        auto& bq = item.second;
-        std::cout << "device_id: " << item.first
-                  << ", resource count:" << bq.Size();
-    }
-}
-
 void GPUIndex::SetGpuDevice(const int &gpu_id) {
    gpu_id_ = gpu_id;
 }

--- a/cpp/src/core/knowhere/knowhere/index/vector_index/gpu_ivf.h
+++ b/cpp/src/core/knowhere/knowhere/index/vector_index/gpu_ivf.h
@@ -18,118 +18,18 @@

 #pragma once

-#include <faiss/gpu/StandardGpuResources.h>

 #include "ivf.h"
-#include "src/utils/BlockingQueue.h"
+#include "FaissGpuResourceMgr.h"


 namespace zilliz {
 namespace knowhere {

-struct Resource {
-    explicit Resource(std::shared_ptr<faiss::gpu::StandardGpuResources> &r): faiss_res(r) {
-        static int64_t global_id = 0;
-        id = global_id++;
-    }
-
-    std::shared_ptr<faiss::gpu::StandardGpuResources> faiss_res;
-    int64_t id;
-    std::mutex mutex;
-};
-using ResPtr = std::shared_ptr<Resource>;
-using ResWPtr = std::weak_ptr<Resource>;
-
-class FaissGpuResourceMgr {
- public:
-    friend class ResScope;
-
- public:
-    using ResBQ = zilliz::milvus::server::BlockingQueue<ResPtr>;
-
-    struct DeviceParams {
-        int64_t temp_mem_size = 0;
-        int64_t pinned_mem_size = 0;
-        int64_t resource_num = 2;
-    };
-
- public:
-    static FaissGpuResourceMgr &
-    GetInstance();
-
-    // Free gpu resource, avoid cudaGetDevice error when deallocate.
-    // this func should be invoke before main return
-    void
-    Free();
-
-    void
-    AllocateTempMem(ResPtr &resource, const int64_t& device_id, const int64_t& size);
-
-    void
-    InitDevice(int64_t device_id,
-               int64_t pin_mem_size = 0,
-               int64_t temp_mem_size = 0,
-               int64_t res_num = 2);
-
-    void
-    InitResource();
-
-    // allocate gpu memory invoke by build or copy_to_gpu
-    ResPtr
-    GetRes(const int64_t &device_id, const int64_t& alloc_size = 0);
-
-    // allocate gpu memory before search
-    // this func will return True if the device is idle and exists an idle resource.
-    //bool
-    //GetRes(const int64_t& device_id, ResPtr &res, const int64_t& alloc_size = 0);
-
-    void
-    MoveToIdle(const int64_t &device_id, const ResPtr& res);
-
-    void
-    Dump();
-
- protected:
-    bool is_init = false;
-
-    std::map<int64_t ,std::unique_ptr<std::mutex>> mutex_cache_;
-    std::map<int64_t, DeviceParams> devices_params_;
-    std::map<int64_t, ResBQ> idle_map_;
-};
-
-class ResScope {
- public:
-    ResScope(ResPtr &res, const int64_t& device_id, const bool& isown)
-        : resource(res), device_id(device_id), move(true), own(isown) {
-        if (isown) FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->lock();
-        res->mutex.lock();
-    }
-
-    // specif for search
-    // get the ownership of gpuresource and gpu
-    ResScope(ResPtr &res, const int64_t &device_id)
-        : resource(res), device_id(device_id), move(false), own(true) {
-        FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->lock();
-        res->mutex.lock();
-    }
-
-    ~ResScope() {
-        if (own) FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->unlock();
-        if (move) FaissGpuResourceMgr::GetInstance().MoveToIdle(device_id, resource);
-        resource->mutex.unlock();
-    }
-
- private:
-    ResPtr resource;
-    int64_t device_id;
-    bool move = true;
-    bool own = false;
-};
-
 class GPUIndex {
 public:
    explicit GPUIndex(const int &device_id) : gpu_id_(device_id) {}
-    GPUIndex(const int& device_id, ResPtr resource): gpu_id_(device_id), res_(std::move(resource)){}
+    GPUIndex(const int& device_id, const ResPtr& resource): gpu_id_(device_id), res_(resource){}

    virtual VectorIndexPtr CopyGpuToCpu(const Config &config) = 0;
    virtual VectorIndexPtr CopyGpuToGpu(const int64_t &device_id, const Config &config) = 0;
@@ -139,7 +39,7 @@ class GPUIndex {

 protected:
    int64_t gpu_id_;
-    ResPtr res_ = nullptr;
+    ResWPtr res_;
 };

 class GPUIVF : public IVF, public GPUIndex {

--- a/cpp/src/core/knowhere/knowhere/index/vector_index/idmap.cpp
+++ b/cpp/src/core/knowhere/knowhere/index/vector_index/idmap.cpp
@@ -224,9 +224,9 @@ void GPUIDMAP::LoadImpl(const BinarySet &index_binary) {

        if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_) ){
            ResScope rs(res, gpu_id_, false);
-            res_ = res;
            auto device_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id_, index);
            index_.reset(device_index);
+            res_ = res;
        } else {
            KNOWHERE_THROW_MSG("Load error, can't get gpu resource");
        }

--- a/cpp/src/core/test/CMakeLists.txt
+++ b/cpp/src/core/test/CMakeLists.txt
@@ -32,6 +32,7 @@ set(ivf_srcs
        ${CORE_SOURCE_DIR}/knowhere/knowhere/adapter/structure.cpp
        ${CORE_SOURCE_DIR}/knowhere/knowhere/common/exception.cpp
        ${CORE_SOURCE_DIR}/knowhere/knowhere/common/timer.cpp
+        ${CORE_SOURCE_DIR}/knowhere/knowhere/index/vector_index/FaissGpuResourceMgr.cpp
        utils.cpp
        )
 if(NOT TARGET test_ivf)
@@ -48,6 +49,7 @@ set(idmap_srcs
        ${CORE_SOURCE_DIR}/knowhere/knowhere/adapter/structure.cpp
        ${CORE_SOURCE_DIR}/knowhere/knowhere/common/exception.cpp
        ${CORE_SOURCE_DIR}/knowhere/knowhere/common/timer.cpp
+        ${CORE_SOURCE_DIR}/knowhere/knowhere/index/vector_index/FaissGpuResourceMgr.cpp
        utils.cpp
        )
 if(NOT TARGET test_idmap)

--- a/cpp/src/wrapper/KnowhereResource.cpp
+++ b/cpp/src/wrapper/KnowhereResource.cpp
@@ -17,6 +17,7 @@


 #include "KnowhereResource.h"
+#include "knowhere/index/vector_index/FaissGpuResourceMgr.h"
 #include "server/ServerConfig.h"

 #include <map>

--- a/cpp/src/wrapper/KnowhereResource.h
+++ b/cpp/src/wrapper/KnowhereResource.h
@@ -19,7 +19,6 @@
 #pragma once

 #include "utils/Error.h"
-#include "knowhere/index/vector_index/gpu_ivf.h"

 namespace zilliz {
 namespace milvus {