未验证 提交 531fca88 编写于 作者: C Cai Yudong 提交者: GitHub

Caiyd 1655 gpu ivfflat delete (#1809)

* support GPUIVF delete
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* calc bitset offset correctly
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* support GPUSQ8 delete
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* support GPUIVFPQ delete
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* update unittest for GPU delete
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* delete FAISS compile option -DFAISS_USE_FLOAT16
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* code opt
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* update changelog
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>
上级 c0919292
......@@ -8,6 +8,7 @@ Please mark all change in change log and use the issue from GitHub
- \#1762 Server is not forbidden to create new partition which tag is "_default"
## Feature
- \#1655 GPU index support delete vectors
## Improvement
......
......@@ -145,7 +145,7 @@ GPUIVF::QueryImpl(int64_t n, const float* data, int64_t k, float* distances, int
if (device_index) {
device_index->nprobe = config[IndexParams::nprobe];
ResScope rs(res_, gpu_id_);
device_index->search(n, (float*)data, k, distances, labels);
device_index->search(n, (float*)data, k, distances, labels, bitset_);
} else {
KNOWHERE_THROW_MSG("Not a GpuIndexIVF type.");
}
......
......@@ -366,9 +366,15 @@ GpuIndexIVFPQ::searchImpl_(int n,
static_assert(sizeof(long) == sizeof(Index::idx_t), "size mismatch");
Tensor<long, 2, true> outLabels(const_cast<long*>(labels), {n, k});
auto bitsetDevice = toDevice<uint8_t, 1>(resources_, device_, nullptr, stream, {0});
index_->query(queries, bitsetDevice, nprobe, k, outDistances, outLabels);
if (!bitset) {
auto bitsetDevice = toDevice<uint8_t, 1>(resources_, device_, nullptr, stream, {0});
index_->query(queries, bitsetDevice, nprobe, k, outDistances, outLabels);
} else {
auto bitsetDevice = toDevice<uint8_t, 1>(resources_, device_,
const_cast<uint8_t*>(bitset->data()), stream,
{(int) bitset->size()});
index_->query(queries, bitsetDevice, nprobe, k, outDistances, outLabels);
}
}
int
......
......@@ -275,9 +275,15 @@ GpuIndexIVFScalarQuantizer::searchImpl_(int n,
static_assert(sizeof(long) == sizeof(Index::idx_t), "size mismatch");
Tensor<long, 2, true> outLabels(const_cast<long*>(labels), {n, k});
auto bitsetDevice = toDevice<uint8_t, 1>(resources_, device_, nullptr, stream, {0});
index_->query(queries, bitsetDevice, nprobe, k, outDistances, outLabels);
if (!bitset) {
auto bitsetDevice = toDevice<uint8_t, 1>(resources_, device_, nullptr, stream, {0});
index_->query(queries, bitsetDevice, nprobe, k, outDistances, outLabels);
} else {
auto bitsetDevice = toDevice<uint8_t, 1>(resources_, device_,
const_cast<uint8_t*>(bitset->data()), stream,
{(int) bitset->size()});
index_->query(queries, bitsetDevice, nprobe, k, outDistances, outLabels);
}
}
} } // namespace
......@@ -164,8 +164,6 @@ IVFFlat::classifyAndAddVectors(Tensor<float, 2, true>& vecs,
auto& mem = resources_->getMemoryManagerCurrentDevice();
auto stream = resources_->getDefaultStreamCurrentDevice();
DeviceTensor<uint8_t, 1, true> bitset(mem, {0}, stream);
// Number of valid vectors that we actually add; we return this
int numAdded = 0;
......@@ -176,6 +174,8 @@ IVFFlat::classifyAndAddVectors(Tensor<float, 2, true>& vecs,
listIds2d(mem, {vecs.getSize(0), 1}, stream);
auto listIds = listIds2d.view<1>({vecs.getSize(0)});
/* pseudo bitset */
DeviceTensor<uint8_t, 1, true> bitset(mem, {0}, stream);
quantizer_->query(vecs, bitset, 1, listDistance2d, listIds2d, false);
// Calculate residuals for these vectors, if needed
......@@ -354,8 +354,9 @@ IVFFlat::query(Tensor<float, 2, true>& queries,
// Find the `nprobe` closest lists; we can use int indices both
// internally and externally
DeviceTensor<uint8_t, 1, true> coarseBitset(mem, {0}, stream);
quantizer_->query(queries,
bitset,
coarseBitset,
nprobe,
coarseDistances,
coarseIndices,
......@@ -371,6 +372,7 @@ IVFFlat::query(Tensor<float, 2, true>& queries,
runIVFFlatScan(queries,
coarseIndices,
bitset,
deviceListDataPointers_,
deviceListIndexPointers_,
indicesOptions_,
......
......@@ -160,6 +160,7 @@ ivfFlatScan(Tensor<float, 2, true> queries,
void
runIVFFlatScanTile(Tensor<float, 2, true>& queries,
Tensor<int, 2, true>& listIds,
Tensor<uint8_t, 1, true>& bitset,
thrust::device_vector<void*>& listData,
thrust::device_vector<void*>& listIndices,
IndicesOptions indicesOptions,
......@@ -315,7 +316,11 @@ runIVFFlatScanTile(Tensor<float, 2, true>& queries,
#undef RUN_IVF_FLAT
// k-select the output in chunks, to increase parallelism
runPass1SelectLists(prefixSumOffsets,
runPass1SelectLists(listIndices,
indicesOptions,
prefixSumOffsets,
listIds,
bitset,
allDistances,
listIds.getSize(1),
k,
......@@ -344,6 +349,7 @@ runIVFFlatScanTile(Tensor<float, 2, true>& queries,
void
runIVFFlatScan(Tensor<float, 2, true>& queries,
Tensor<int, 2, true>& listIds,
Tensor<uint8_t, 1, true>& bitset,
thrust::device_vector<void*>& listData,
thrust::device_vector<void*>& listIndices,
IndicesOptions indicesOptions,
......@@ -489,6 +495,7 @@ runIVFFlatScan(Tensor<float, 2, true>& queries,
runIVFFlatScanTile(queryView,
listIdsView,
bitset,
listData,
listIndices,
indicesOptions,
......
......@@ -20,6 +20,7 @@ class GpuResources;
void runIVFFlatScan(Tensor<float, 2, true>& queries,
Tensor<int, 2, true>& listIds,
Tensor<uint8_t, 1, true>& bitset,
thrust::device_vector<void*>& listData,
thrust::device_vector<void*>& listIndices,
IndicesOptions indicesOptions,
......
......@@ -119,8 +119,6 @@ IVFPQ::classifyAndAddVectors(Tensor<float, 2, true>& vecs,
auto& mem = resources_->getMemoryManagerCurrentDevice();
auto stream = resources_->getDefaultStreamCurrentDevice();
DeviceTensor<uint8_t, 1, true> bitset(mem, {0}, stream);
// Number of valid vectors that we actually add; we return this
int numAdded = 0;
......@@ -130,6 +128,8 @@ IVFPQ::classifyAndAddVectors(Tensor<float, 2, true>& vecs,
DeviceTensor<int, 2, true> listIds2d(mem, {vecs.getSize(0), 1}, stream);
auto listIds = listIds2d.view<1>({vecs.getSize(0)});
/* pseudo bitset */
DeviceTensor<uint8_t, 1, true> bitset(mem, {0}, stream);
quantizer_->query(vecs, bitset, 1, listDistance, listIds2d, false);
// Copy the lists that we wish to append to back to the CPU
......@@ -532,10 +532,11 @@ IVFPQ::query(Tensor<float, 2, true>& queries,
DeviceTensor<int, 2, true>
coarseIndices(mem, {queries.getSize(0), nprobe}, stream);
DeviceTensor<uint8_t, 1, true> coarseBitset(mem, {0}, stream);
// Find the `nprobe` closest coarse centroids; we can use int
// indices both internally and externally
quantizer_->query(queries,
bitset,
coarseBitset,
nprobe,
coarseDistances,
coarseIndices,
......@@ -543,6 +544,7 @@ IVFPQ::query(Tensor<float, 2, true>& queries,
if (precomputedCodes_) {
runPQPrecomputedCodes_(queries,
bitset,
coarseDistances,
coarseIndices,
k,
......@@ -550,6 +552,7 @@ IVFPQ::query(Tensor<float, 2, true>& queries,
outIndices);
} else {
runPQNoPrecomputedCodes_(queries,
bitset,
coarseDistances,
coarseIndices,
k,
......@@ -592,6 +595,7 @@ IVFPQ::getPQCentroids() {
void
IVFPQ::runPQPrecomputedCodes_(
Tensor<float, 2, true>& queries,
Tensor<uint8_t, 1, true>& bitset,
DeviceTensor<float, 2, true>& coarseDistances,
DeviceTensor<int, 2, true>& coarseIndices,
int k,
......@@ -655,6 +659,7 @@ IVFPQ::runPQPrecomputedCodes_(
term2, // term 2
term3, // term 3
coarseIndices,
bitset,
useFloat16LookupTables_,
bytesPerVector_,
numSubQuantizers_,
......@@ -673,6 +678,7 @@ IVFPQ::runPQPrecomputedCodes_(
void
IVFPQ::runPQNoPrecomputedCodes_(
Tensor<float, 2, true>& queries,
Tensor<uint8_t, 1, true>& bitset,
DeviceTensor<float, 2, true>& coarseDistances,
DeviceTensor<int, 2, true>& coarseIndices,
int k,
......@@ -685,6 +691,7 @@ IVFPQ::runPQNoPrecomputedCodes_(
coarseCentroids,
pqCentroidsInnermostCode_,
coarseIndices,
bitset,
useFloat16LookupTables_,
bytesPerVector_,
numSubQuantizers_,
......
......@@ -83,6 +83,7 @@ class IVFPQ : public IVFBase {
/// Runs kernels for scanning inverted lists with precomputed codes
void runPQPrecomputedCodes_(Tensor<float, 2, true>& queries,
Tensor<uint8_t, 1, true>& bitset,
DeviceTensor<float, 2, true>& coarseDistances,
DeviceTensor<int, 2, true>& coarseIndices,
int k,
......@@ -91,6 +92,7 @@ class IVFPQ : public IVFBase {
/// Runs kernels for scanning inverted lists without precomputed codes
void runPQNoPrecomputedCodes_(Tensor<float, 2, true>& queries,
Tensor<uint8_t, 1, true>& bitset,
DeviceTensor<float, 2, true>& coarseDistances,
DeviceTensor<int, 2, true>& coarseIndices,
int k,
......
......@@ -16,6 +16,70 @@
// post-processing and k-selecting the results
namespace faiss { namespace gpu {
// This is warp divergence central, but this is really a final step
// and happening a small number of times
inline __device__ int binarySearchForBucket(int* prefixSumOffsets,
int size,
int val) {
int start = 0;
int end = size;
while (end - start > 0) {
int mid = start + (end - start) / 2;
int midVal = prefixSumOffsets[mid];
// Find the first bucket that we are <=
if (midVal <= val) {
start = mid + 1;
} else {
end = mid;
}
}
// We must find the bucket that it is in
assert(start != size);
return start;
}
inline __device__ long
getListIndex(int queryId,
int offset,
void** listIndices,
Tensor<int, 2, true>& prefixSumOffsets,
Tensor<int, 2, true>& topQueryToCentroid,
IndicesOptions opt) {
long index = -1;
// In order to determine the actual user index, we need to first
// determine what list it was in.
// We do this by binary search in the prefix sum list.
int probe = binarySearchForBucket(prefixSumOffsets[queryId].data(),
prefixSumOffsets.getSize(1),
offset);
// This is then the probe for the query; we can find the actual
// list ID from this
int listId = topQueryToCentroid[queryId][probe];
// Now, we need to know the offset within the list
// We ensure that before the array (at offset -1), there is a 0 value
int listStart = *(prefixSumOffsets[queryId][probe].data() - 1);
int listOffset = offset - listStart;
// This gives us our final index
if (opt == INDICES_32_BIT) {
index = (long) ((int*) listIndices[listId])[listOffset];
} else if (opt == INDICES_64_BIT) {
index = ((long*) listIndices[listId])[listOffset];
} else {
index = ((long) listId << 32 | (long) listOffset);
}
return index;
}
/// Function for multi-pass scanning that collects the length of
/// intermediate results for all (query, probe) pair
void runCalcListOffsets(Tensor<int, 2, true>& topQueryToCentroid,
......@@ -25,7 +89,11 @@ void runCalcListOffsets(Tensor<int, 2, true>& topQueryToCentroid,
cudaStream_t stream);
/// Performs a first pass of k-selection on the results
void runPass1SelectLists(Tensor<int, 2, true>& prefixSumOffsets,
void runPass1SelectLists(thrust::device_vector<void*>& listIndices,
IndicesOptions indicesOptions,
Tensor<int, 2, true>& prefixSumOffsets,
Tensor<int, 2, true>& topQueryToCentroid,
Tensor<uint8_t, 1, true>& bitset,
Tensor<float, 1, true>& distance,
int nprobe,
int k,
......
......@@ -23,10 +23,14 @@ namespace faiss { namespace gpu {
template <int ThreadsPerBlock, int NumWarpQ, int NumThreadQ, bool Dir>
__global__ void
pass1SelectLists(Tensor<int, 2, true> prefixSumOffsets,
pass1SelectLists(void** listIndices,
Tensor<int, 2, true> prefixSumOffsets,
Tensor<int, 2, true> topQueryToCentroid,
Tensor<uint8_t, 1, true> bitset,
Tensor<float, 1, true> distance,
int nprobe,
int k,
IndicesOptions opt,
Tensor<float, 3, true> heapDistances,
Tensor<int, 3, true> heapIndices) {
constexpr int kNumWarps = ThreadsPerBlock / kWarpSize;
......@@ -58,16 +62,38 @@ pass1SelectLists(Tensor<int, 2, true> prefixSumOffsets,
int i = threadIdx.x;
auto distanceStart = distance[start].data();
bool bitsetEmpty = (bitset.getSize(0) == 0);
long index = -1;
// BlockSelect add cannot be used in a warp divergent circumstance; we
// handle the remainder warp below
for (; i < limit; i += blockDim.x) {
heap.add(distanceStart[i], start + i);
index = getListIndex(queryId,
start + i,
listIndices,
prefixSumOffsets,
topQueryToCentroid,
opt);
if (bitsetEmpty || (!(bitset[index >> 3] & (0x1 << (index & 0x7))))) {
heap.add(distanceStart[i], start + i);
} else {
heap.add((1.0 / 0.0), start + i);
}
}
// Handle warp divergence separately
if (i < num) {
heap.addThreadQ(distanceStart[i], start + i);
index = getListIndex(queryId,
start + i,
listIndices,
prefixSumOffsets,
topQueryToCentroid,
opt);
if (bitsetEmpty || (!(bitset[index >> 3] & (0x1 << (index & 0x7))))) {
heap.addThreadQ(distanceStart[i], start + i);
} else {
heap.addThreadQ((1.0 / 0.0), start + i);
}
}
// Merge all final results
......@@ -82,7 +108,11 @@ pass1SelectLists(Tensor<int, 2, true> prefixSumOffsets,
}
void
runPass1SelectLists(Tensor<int, 2, true>& prefixSumOffsets,
runPass1SelectLists(thrust::device_vector<void*>& listIndices,
IndicesOptions indicesOptions,
Tensor<int, 2, true>& prefixSumOffsets,
Tensor<int, 2, true>& topQueryToCentroid,
Tensor<uint8_t, 1, true>& bitset,
Tensor<float, 1, true>& distance,
int nprobe,
int k,
......@@ -98,10 +128,14 @@ runPass1SelectLists(Tensor<int, 2, true>& prefixSumOffsets,
#define RUN_PASS(BLOCK, NUM_WARP_Q, NUM_THREAD_Q, DIR) \
do { \
pass1SelectLists<BLOCK, NUM_WARP_Q, NUM_THREAD_Q, DIR> \
<<<grid, BLOCK, 0, stream>>>(prefixSumOffsets, \
<<<grid, BLOCK, 0, stream>>>(listIndices.data().get(), \
prefixSumOffsets, \
topQueryToCentroid, \
bitset, \
distance, \
nprobe, \
k, \
indicesOptions, \
heapDistances, \
heapIndices); \
CUDA_TEST_ERROR(); \
......
......@@ -23,30 +23,30 @@ namespace faiss { namespace gpu {
// This is warp divergence central, but this is really a final step
// and happening a small number of times
inline __device__ int binarySearchForBucket(int* prefixSumOffsets,
int size,
int val) {
int start = 0;
int end = size;
while (end - start > 0) {
int mid = start + (end - start) / 2;
int midVal = prefixSumOffsets[mid];
// Find the first bucket that we are <=
if (midVal <= val) {
start = mid + 1;
} else {
end = mid;
}
}
// We must find the bucket that it is in
assert(start != size);
return start;
}
//inline __device__ int binarySearchForBucket(int* prefixSumOffsets,
// int size,
// int val) {
// int start = 0;
// int end = size;
//
// while (end - start > 0) {
// int mid = start + (end - start) / 2;
//
// int midVal = prefixSumOffsets[mid];
//
// // Find the first bucket that we are <=
// if (midVal <= val) {
// start = mid + 1;
// } else {
// end = mid;
// }
// }
//
// // We must find the bucket that it is in
// assert(start != size);
//
// return start;
//}
template <int ThreadsPerBlock,
int NumWarpQ,
......@@ -113,30 +113,12 @@ pass2SelectLists(Tensor<float, 2, true> heapDistances,
// calculated by the original scan.
int offset = heapIndices[queryId][v];
// In order to determine the actual user index, we need to first
// determine what list it was in.
// We do this by binary search in the prefix sum list.
int probe = binarySearchForBucket(prefixSumOffsets[queryId].data(),
prefixSumOffsets.getSize(1),
offset);
// This is then the probe for the query; we can find the actual
// list ID from this
int listId = topQueryToCentroid[queryId][probe];
// Now, we need to know the offset within the list
// We ensure that before the array (at offset -1), there is a 0 value
int listStart = *(prefixSumOffsets[queryId][probe].data() - 1);
int listOffset = offset - listStart;
// This gives us our final index
if (opt == INDICES_32_BIT) {
index = (long) ((int*) listIndices[listId])[listOffset];
} else if (opt == INDICES_64_BIT) {
index = ((long*) listIndices[listId])[listOffset];
} else {
index = ((long) listId << 32 | (long) listOffset);
}
index = getListIndex(queryId,
offset,
listIndices,
prefixSumOffsets,
topQueryToCentroid,
opt);
}
outIndices[queryId][i] = index;
......
......@@ -224,6 +224,7 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
Tensor<float, 3, true>& pqCentroidsInnermostCode,
NoTypeTensor<4, true>& codeDistances,
Tensor<int, 2, true>& topQueryToCentroid,
Tensor<uint8_t, 1, true>& bitset,
bool useFloat16Lookup,
int bytesPerCode,
int numSubQuantizers,
......@@ -357,7 +358,11 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
CUDA_TEST_ERROR();
// k-select the output in chunks, to increase parallelism
runPass1SelectLists(prefixSumOffsets,
runPass1SelectLists(listIndices,
indicesOptions,
prefixSumOffsets,
topQueryToCentroid,
bitset,
allDistances,
topQueryToCentroid.getSize(1),
k,
......@@ -387,6 +392,7 @@ void runPQScanMultiPassNoPrecomputed(Tensor<float, 2, true>& queries,
Tensor<float, 2, true>& centroids,
Tensor<float, 3, true>& pqCentroidsInnermostCode,
Tensor<int, 2, true>& topQueryToCentroid,
Tensor<uint8_t, 1, true>& bitset,
bool useFloat16Lookup,
int bytesPerCode,
int numSubQuantizers,
......@@ -560,6 +566,7 @@ void runPQScanMultiPassNoPrecomputed(Tensor<float, 2, true>& queries,
pqCentroidsInnermostCode,
codeDistancesView,
coarseIndicesView,
bitset,
useFloat16Lookup,
bytesPerCode,
numSubQuantizers,
......
......@@ -24,6 +24,7 @@ void runPQScanMultiPassNoPrecomputed(Tensor<float, 2, true>& queries,
Tensor<float, 2, true>& centroids,
Tensor<float, 3, true>& pqCentroidsInnermostCode,
Tensor<int, 2, true>& topQueryToCentroid,
Tensor<uint8_t, 1, true>& bitset,
bool useFloat16Lookup,
int bytesPerCode,
int numSubQuantizers,
......
......@@ -219,6 +219,7 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
NoTypeTensor<3, true>& precompTerm2,
NoTypeTensor<3, true>& precompTerm3,
Tensor<int, 2, true>& topQueryToCentroid,
Tensor<uint8_t, 1, true>& bitset,
bool useFloat16Lookup,
int bytesPerCode,
int numSubQuantizers,
......@@ -344,7 +345,11 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
}
// k-select the output in chunks, to increase parallelism
runPass1SelectLists(prefixSumOffsets,
runPass1SelectLists(listIndices,
indicesOptions,
prefixSumOffsets,
topQueryToCentroid,
bitset,
allDistances,
topQueryToCentroid.getSize(1),
k,
......@@ -377,6 +382,7 @@ void runPQScanMultiPassPrecomputed(Tensor<float, 2, true>& queries,
NoTypeTensor<3, true>& precompTerm2,
NoTypeTensor<3, true>& precompTerm3,
Tensor<int, 2, true>& topQueryToCentroid,
Tensor<uint8_t, 1, true>& bitset,
bool useFloat16Lookup,
int bytesPerCode,
int numSubQuantizers,
......@@ -527,6 +533,7 @@ void runPQScanMultiPassPrecomputed(Tensor<float, 2, true>& queries,
precompTerm2,
term3View,
coarseIndicesView,
bitset,
useFloat16Lookup,
bytesPerCode,
numSubQuantizers,
......
......@@ -22,6 +22,7 @@ void runPQScanMultiPassPrecomputed(Tensor<float, 2, true>& queries,
NoTypeTensor<3, true>& precompTerm2,
NoTypeTensor<3, true>& precompTerm3,
Tensor<int, 2, true>& topQueryToCentroid,
Tensor<uint8_t, 1, true>& bitset,
bool useFloat16Lookup,
int bytesPerCode,
int numSubQuantizers,
......
......@@ -22,7 +22,7 @@ NVCCFLAGS = -I $(CUDA_ROOT)/targets/x86_64-linux/include/ \
-Xcudafe --diag_suppress=unrecognized_attribute \
$(CUDA_ARCH) \
-lineinfo \
-ccbin $(CXX) -DFAISS_USE_FLOAT16
-ccbin $(CXX)
OS = $(shell uname -s)
......
......@@ -90,9 +90,13 @@ INSTANTIATE_TEST_CASE_P(
std::make_tuple(milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ, milvus::knowhere::IndexMode::MODE_CPU),
std::make_tuple(milvus::knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, milvus::knowhere::IndexMode::MODE_CPU)));
TEST_P(IVFTest, ivf_basic) {
TEST_P(IVFTest, ivf_basic_cpu) {
assert(!xb.empty());
if (index_mode_ != milvus::knowhere::IndexMode::MODE_CPU) {
return;
}
// null faiss index
ASSERT_ANY_THROW(index_->Add(base_dataset, conf_));
ASSERT_ANY_THROW(index_->AddWithoutIds(base_dataset, conf_));
......@@ -103,11 +107,10 @@ TEST_P(IVFTest, ivf_basic) {
EXPECT_EQ(index_->Dim(), dim);
auto result = index_->Query(query_dataset, conf_);
AssertAnns(result, nq, conf_[milvus::knowhere::meta::TOPK]);
AssertAnns(result, nq, k);
// PrintResult(result, nq, k);
if (index_mode_ == milvus::knowhere::IndexMode::MODE_CPU &&
index_type_ != milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
if (index_type_ != milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
auto result2 = index_->QueryById(id_dataset, conf_);
AssertAnns(result2, nq, k);
......@@ -143,6 +146,43 @@ TEST_P(IVFTest, ivf_basic) {
#endif
}
TEST_P(IVFTest, ivf_basic_gpu) {
assert(!xb.empty());
if (index_mode_ != milvus::knowhere::IndexMode::MODE_GPU) {
return;
}
// null faiss index
ASSERT_ANY_THROW(index_->Add(base_dataset, conf_));
ASSERT_ANY_THROW(index_->AddWithoutIds(base_dataset, conf_));
index_->Train(base_dataset, conf_);
index_->Add(base_dataset, conf_);
EXPECT_EQ(index_->Count(), nb);
EXPECT_EQ(index_->Dim(), dim);
auto result = index_->Query(query_dataset, conf_);
AssertAnns(result, nq, k);
// PrintResult(result, nq, k);
if (index_type_ != milvus::knowhere::IndexEnum::INDEX_FAISS_IVFSQ8H) {
faiss::ConcurrentBitsetPtr concurrent_bitset_ptr = std::make_shared<faiss::ConcurrentBitset>(nb);
for (int64_t i = 0; i < nq; ++i) {
concurrent_bitset_ptr->set(i);
}
index_->SetBlacklist(concurrent_bitset_ptr);
auto result_bs_1 = index_->Query(query_dataset, conf_);
AssertAnns(result_bs_1, nq, k, CheckMode::CHECK_NOT_EQUAL);
// PrintResult(result, nq, k);
}
#ifdef MILVUS_GPU_VERSION
milvus::knowhere::FaissGpuResourceMgr::GetInstance().Dump();
#endif
}
TEST_P(IVFTest, ivf_serialize) {
fiu_init(0);
auto serialize = [](const std::string& filename, milvus::knowhere::BinaryPtr& bin, uint8_t* ret) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册