Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
milvus
milvus
提交
54d17bc5
M
milvus
项目概览
milvus
/
milvus
9 个月 前同步成功
通知
260
Star
22476
Fork
2472
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
54d17bc5
编写于
7月 13, 2022
作者:
X
xige-16
提交者:
GitHub
7月 13, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix query too slow when insert multi repeated pk data (#18231)
Signed-off-by:
N
xige-16
<
xi.ge@zilliz.com
>
上级
e5fe4612
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
76 addition
and
40 deletion
+76
-40
internal/core/src/common/Types.h
internal/core/src/common/Types.h
+4
-1
internal/core/src/segcore/DeletedRecord.h
internal/core/src/segcore/DeletedRecord.h
+15
-1
internal/core/src/segcore/InsertRecord.h
internal/core/src/segcore/InsertRecord.h
+13
-11
internal/core/src/segcore/SegmentGrowingImpl.cpp
internal/core/src/segcore/SegmentGrowingImpl.cpp
+4
-5
internal/core/src/segcore/SegmentGrowingImpl.h
internal/core/src/segcore/SegmentGrowingImpl.h
+5
-0
internal/core/src/segcore/SegmentInterface.h
internal/core/src/segcore/SegmentInterface.h
+3
-0
internal/core/src/segcore/SegmentSealedImpl.cpp
internal/core/src/segcore/SegmentSealedImpl.cpp
+4
-5
internal/core/src/segcore/SegmentSealedImpl.h
internal/core/src/segcore/SegmentSealedImpl.h
+5
-0
internal/core/src/segcore/Utils.cpp
internal/core/src/segcore/Utils.cpp
+22
-16
internal/querynode/task_query.go
internal/querynode/task_query.go
+1
-1
未找到文件。
internal/core/src/common/Types.h
浏览文件 @
54d17bc5
...
...
@@ -22,6 +22,7 @@
#include <utility>
#include <vector>
#include <tbb/concurrent_unordered_map.h>
#include <tbb/concurrent_unordered_set.h>
#include <boost/align/aligned_allocator.hpp>
#include <boost/container/vector.hpp>
#include <boost/dynamic_bitset.hpp>
...
...
@@ -70,7 +71,9 @@ using VectorArray = proto::schema::VectorField;
using
IdArray
=
proto
::
schema
::
IDs
;
using
InsertData
=
proto
::
segcore
::
InsertRecord
;
using
PkType
=
std
::
variant
<
std
::
monostate
,
int64_t
,
std
::
string
>
;
using
Pk2OffsetType
=
tbb
::
concurrent_unordered_multimap
<
PkType
,
int64_t
,
std
::
hash
<
PkType
>>
;
// tbb::concurrent_unordered_multimap equal_range too slow when multi repeated key
// using Pk2OffsetType = tbb::concurrent_unordered_multimap<PkType, int64_t, std::hash<PkType>>;
using
Pk2OffsetType
=
tbb
::
concurrent_unordered_map
<
PkType
,
tbb
::
concurrent_unordered_set
<
int64_t
>
,
std
::
hash
<
PkType
>>
;
inline
bool
IsPrimaryKeyDataType
(
DataType
data_type
)
{
...
...
internal/core/src/segcore/DeletedRecord.h
浏览文件 @
54d17bc5
...
...
@@ -42,6 +42,21 @@ struct DeletedRecord {
return
lru_
;
}
std
::
shared_ptr
<
TmpBitmap
>
clone_lru_entry
(
int64_t
insert_barrier
,
int64_t
del_barrier
,
int64_t
&
old_del_barrier
,
bool
&
hit_cache
)
{
std
::
shared_lock
lck
(
shared_mutex_
);
auto
res
=
lru_
->
clone
(
insert_barrier
);
old_del_barrier
=
lru_
->
del_barrier
;
if
(
lru_
->
bitmap_ptr
->
size
()
==
insert_barrier
&&
lru_
->
del_barrier
==
del_barrier
)
{
hit_cache
=
true
;
}
else
{
res
->
del_barrier
=
del_barrier
;
}
return
res
;
}
void
insert_lru_entry
(
std
::
shared_ptr
<
TmpBitmap
>
new_entry
,
bool
force
=
false
)
{
std
::
lock_guard
lck
(
shared_mutex_
);
...
...
@@ -59,7 +74,6 @@ struct DeletedRecord {
AckResponder
ack_responder_
;
ConcurrentVector
<
Timestamp
>
timestamps_
;
ConcurrentVector
<
PkType
>
pks_
;
int64_t
record_size_
=
0
;
private:
std
::
shared_ptr
<
TmpBitmap
>
lru_
;
...
...
internal/core/src/segcore/InsertRecord.h
浏览文件 @
54d17bc5
...
...
@@ -43,11 +43,12 @@ struct InsertRecord {
std
::
vector
<
SegOffset
>
search_pk
(
const
PkType
pk
,
Timestamp
timestamp
)
const
{
std
::
vector
<
SegOffset
>
res_offsets
;
auto
[
iter_b
,
iter_e
]
=
pk2offset_
.
equal_range
(
pk
);
for
(
auto
iter
=
iter_b
;
iter
!=
iter_e
;
++
iter
)
{
auto
offset
=
SegOffset
(
iter
->
second
);
if
(
timestamps_
[
offset
.
get
()]
<=
timestamp
)
{
res_offsets
.
push_back
(
offset
);
auto
offset_iter
=
pk2offset_
.
find
(
pk
);
if
(
offset_iter
!=
pk2offset_
.
end
())
{
for
(
auto
offset
:
offset_iter
->
second
)
{
if
(
timestamps_
[
offset
]
<=
timestamp
)
{
res_offsets
.
push_back
(
SegOffset
(
offset
));
}
}
}
...
...
@@ -57,11 +58,12 @@ struct InsertRecord {
std
::
vector
<
SegOffset
>
search_pk
(
const
PkType
pk
,
int64_t
insert_barrier
)
const
{
std
::
vector
<
SegOffset
>
res_offsets
;
auto
[
iter_b
,
iter_e
]
=
pk2offset_
.
equal_range
(
pk
);
for
(
auto
iter
=
iter_b
;
iter
!=
iter_e
;
++
iter
)
{
auto
offset
=
SegOffset
(
iter
->
second
);
if
(
offset
.
get
()
<
insert_barrier
)
{
res_offsets
.
push_back
(
offset
);
auto
offset_iter
=
pk2offset_
.
find
(
pk
);
if
(
offset_iter
!=
pk2offset_
.
end
())
{
for
(
auto
offset
:
offset_iter
->
second
)
{
if
(
offset
<
insert_barrier
)
{
res_offsets
.
push_back
(
SegOffset
(
offset
));
}
}
}
...
...
@@ -70,7 +72,7 @@ struct InsertRecord {
void
insert_pk
(
const
PkType
pk
,
int64_t
offset
)
{
pk2offset_
.
insert
(
std
::
make_pair
(
pk
,
offset
)
);
pk2offset_
[
pk
].
insert
(
offset
);
}
bool
...
...
internal/core/src/segcore/SegmentGrowingImpl.cpp
浏览文件 @
54d17bc5
...
...
@@ -157,11 +157,10 @@ SegmentGrowingImpl::LoadDeletedRecord(const LoadDeletedRecordInfo& info) {
auto
timestamps
=
reinterpret_cast
<
const
Timestamp
*>
(
info
.
timestamps
);
// step 2: fill pks and timestamps
deleted_record_
.
pks_
.
set_data_raw
(
0
,
pks
.
data
(),
size
);
deleted_record_
.
timestamps_
.
set_data_raw
(
0
,
timestamps
,
size
);
deleted_record_
.
ack_responder_
.
AddSegment
(
0
,
size
);
deleted_record_
.
reserved
.
fetch_add
(
size
);
deleted_record_
.
record_size_
=
size
;
auto
reserved_begin
=
deleted_record_
.
reserved
.
fetch_add
(
size
);
deleted_record_
.
pks_
.
set_data_raw
(
reserved_begin
,
pks
.
data
(),
size
);
deleted_record_
.
timestamps_
.
set_data_raw
(
reserved_begin
,
timestamps
,
size
);
deleted_record_
.
ack_responder_
.
AddSegment
(
reserved_begin
,
reserved_begin
+
size
);
}
SpanBase
...
...
internal/core/src/segcore/SegmentGrowingImpl.h
浏览文件 @
54d17bc5
...
...
@@ -64,6 +64,11 @@ class SegmentGrowingImpl : public SegmentGrowing {
std
::
string
debug
()
const
override
;
int64_t
get_segment_id
()
const
override
{
return
id_
;
}
public:
const
InsertRecord
&
get_insert_record
()
const
{
...
...
internal/core/src/segcore/SegmentInterface.h
浏览文件 @
54d17bc5
...
...
@@ -69,6 +69,9 @@ class SegmentInterface {
virtual
void
LoadDeletedRecord
(
const
LoadDeletedRecordInfo
&
info
)
=
0
;
virtual
int64_t
get_segment_id
()
const
=
0
;
};
// internal API for DSL calculation
...
...
internal/core/src/segcore/SegmentSealedImpl.cpp
浏览文件 @
54d17bc5
...
...
@@ -254,11 +254,10 @@ SegmentSealedImpl::LoadDeletedRecord(const LoadDeletedRecordInfo& info) {
auto
timestamps
=
reinterpret_cast
<
const
Timestamp
*>
(
info
.
timestamps
);
// step 2: fill pks and timestamps
deleted_record_
.
pks_
.
set_data_raw
(
0
,
pks
.
data
(),
size
);
deleted_record_
.
timestamps_
.
set_data_raw
(
0
,
timestamps
,
size
);
deleted_record_
.
ack_responder_
.
AddSegment
(
0
,
size
);
deleted_record_
.
reserved
.
fetch_add
(
size
);
deleted_record_
.
record_size_
=
size
;
auto
reserved_begin
=
deleted_record_
.
reserved
.
fetch_add
(
size
);
deleted_record_
.
pks_
.
set_data_raw
(
reserved_begin
,
pks
.
data
(),
size
);
deleted_record_
.
timestamps_
.
set_data_raw
(
reserved_begin
,
timestamps
,
size
);
deleted_record_
.
ack_responder_
.
AddSegment
(
reserved_begin
,
reserved_begin
+
size
);
}
// internal API: support scalar index only
...
...
internal/core/src/segcore/SegmentSealedImpl.h
浏览文件 @
54d17bc5
...
...
@@ -50,6 +50,11 @@ class SegmentSealedImpl : public SegmentSealed {
bool
HasFieldData
(
FieldId
field_id
)
const
override
;
int64_t
get_segment_id
()
const
override
{
return
id_
;
}
public:
int64_t
GetMemoryUsageInBytes
()
const
override
;
...
...
internal/core/src/segcore/Utils.cpp
浏览文件 @
54d17bc5
...
...
@@ -380,37 +380,43 @@ get_deleted_bitmap(int64_t del_barrier,
DeletedRecord
&
delete_record
,
const
InsertRecord
&
insert_record
,
Timestamp
query_timestamp
)
{
auto
old
=
delete_record
.
get_lru_entry
();
// if insert_barrier and del_barrier have not changed, use cache data directly
if
(
old
->
bitmap_ptr
->
size
()
==
insert_barrier
)
{
if
(
old
->
del_barrier
==
del_barrier
)
{
return
old
;
}
bool
hit_cache
=
false
;
int64_t
old_del_barrier
=
0
;
auto
current
=
delete_record
.
clone_lru_entry
(
insert_barrier
,
del_barrier
,
old_del_barrier
,
hit_cache
);
if
(
hit_cache
)
{
return
current
;
}
auto
current
=
old
->
clone
(
insert_barrier
);
current
->
del_barrier
=
del_barrier
;
auto
bitmap
=
current
->
bitmap_ptr
;
int64_t
start
,
end
;
if
(
del_barrier
<
old
->
del_barrier
)
{
if
(
del_barrier
<
old
_
del_barrier
)
{
// in this case, ts of delete record[current_del_barrier : old_del_barrier] > query_timestamp
// so these deletion records do not take effect in query/search
// so bitmap corresponding to those pks in delete record[current_del_barrier:old_del_barrier] wil be reset to 0
// for example, current_del_barrier = 2, query_time = 120, the bitmap will be reset to [0, 1, 1, 0, 0, 0, 0, 0]
start
=
del_barrier
;
end
=
old
->
del_barrier
;
end
=
old
_
del_barrier
;
}
else
{
// the cache is not enough, so update bitmap using new pks in delete record[old_del_barrier:current_del_barrier]
// for example, current_del_barrier = 4, query_time = 300, bitmap will be updated to [0, 1, 1, 0, 1, 1, 0, 0]
start
=
old
->
del_barrier
;
start
=
old
_
del_barrier
;
end
=
del_barrier
;
}
// Avoid invalid calculations when there are a lot of repeated delete pks
std
::
unordered_map
<
PkType
,
Timestamp
>
delete_timestamps
;
for
(
auto
del_index
=
start
;
del_index
<
end
;
++
del_index
)
{
// get pk in delete logs
auto
pk
=
delete_record
.
pks_
[
del_index
];
// find insert data which has same pk
auto
timestamp
=
delete_record
.
timestamps_
[
del_index
];
delete_timestamps
[
pk
]
=
timestamp
>
delete_timestamps
[
pk
]
?
timestamp
:
delete_timestamps
[
pk
];
}
for
(
auto
iter
=
delete_timestamps
.
begin
();
iter
!=
delete_timestamps
.
end
();
iter
++
)
{
auto
pk
=
iter
->
first
;
auto
delete_timestamp
=
iter
->
second
;
auto
segOffsets
=
insert_record
.
search_pk
(
pk
,
insert_barrier
);
for
(
auto
offset
:
segOffsets
)
{
int64_t
insert_row_offset
=
offset
.
get
();
...
...
@@ -419,22 +425,22 @@ get_deleted_bitmap(int64_t del_barrier,
// insert after delete with same pk, delete will not task effect on this insert record
// and reset bitmap to 0
if
(
insert_record
.
timestamps_
[
insert_row_offset
]
>
delete_
record
.
timestamps_
[
del_index
]
)
{
if
(
insert_record
.
timestamps_
[
insert_row_offset
]
>
delete_
timestamp
)
{
bitmap
->
reset
(
insert_row_offset
);
continue
;
}
// the deletion record do not take effect in search/query
// and reset bitmap to 0
if
(
delete_
record
.
timestamps_
[
del_index
]
>
query_timestamp
)
{
if
(
delete_
timestamp
>
query_timestamp
)
{
bitmap
->
reset
(
insert_row_offset
);
continue
;
}
// insert data corresponding to the insert_row_offset will be ignored in search/query
bitmap
->
set
(
insert_row_offset
);
}
}
delete_record
.
insert_lru_entry
(
current
);
return
current
;
}
...
...
internal/querynode/task_query.go
浏览文件 @
54d17bc5
...
...
@@ -54,7 +54,7 @@ func (q *queryTask) PreExecute(ctx context.Context) error {
func
(
q
*
queryTask
)
queryOnStreaming
()
error
{
// check ctx timeout
if
!
funcutil
.
CheckCtxValid
(
q
.
Ctx
())
{
return
errors
.
New
(
"
search
context timeout"
)
return
errors
.
New
(
"
query
context timeout"
)
}
// check if collection has been released, check streaming since it's released first
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录