提交 10ee4d90 编写于 作者: G groot 提交者: JinHai-CN

#2366 Reduce SQL execution times (#2383)

* #2366
Signed-off-by: Nyhmo <yihua.mo@zilliz.com>

* fix ut
Signed-off-by: Nyhmo <yihua.mo@zilliz.com>

* fix python test
Signed-off-by: Nyhmo <yihua.mo@zilliz.com>

* fix ut
Signed-off-by: Ngroot <yihua.mo@zilliz.com>

* changelog
Signed-off-by: Ngroot <yihua.mo@zilliz.com>
上级 dfca26d3
......@@ -4,6 +4,7 @@ Please mark all change in change log and use the issue from GitHub
# Milvus 0.9.1 (TBD)
## Bug
- \#2366 Reduce SQL execution times for collection contains lot of partitions
- \#2378 Duplicate data after server restart
- \#2399 The nlist set by the user may not take effect
- \#2403 MySQL max_idle_time is 10 by default
......
......@@ -393,6 +393,7 @@ DBImpl::PreloadCollection(const std::string& collection_id) {
// step 1: get all collection files from parent collection
meta::FilesHolder files_holder;
#if 0
auto status = meta_ptr_->FilesToSearch(collection_id, files_holder);
if (!status.ok()) {
return status;
......@@ -404,6 +405,25 @@ DBImpl::PreloadCollection(const std::string& collection_id) {
for (auto& schema : partition_array) {
status = meta_ptr_->FilesToSearch(schema.collection_id_, files_holder);
}
#else
auto status = meta_ptr_->FilesToSearch(collection_id, files_holder);
if (!status.ok()) {
return status;
}
std::vector<meta::CollectionSchema> partition_array;
status = meta_ptr_->ShowPartitions(collection_id, partition_array);
std::set<std::string> partition_ids;
for (auto& schema : partition_array) {
partition_ids.insert(schema.collection_id_);
}
status = meta_ptr_->FilesToSearchEx(collection_id, partition_ids, files_holder);
if (!status.ok()) {
return status;
}
#endif
int64_t size = 0;
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
......@@ -1642,6 +1662,7 @@ DBImpl::Query(const std::shared_ptr<server::Context>& context, const std::string
Status status;
meta::FilesHolder files_holder;
if (partition_tags.empty()) {
#if 0
// no partition tag specified, means search in whole collection
// get all collection files from parent collection
status = meta_ptr_->FilesToSearch(collection_id, files_holder);
......@@ -1654,11 +1675,33 @@ DBImpl::Query(const std::shared_ptr<server::Context>& context, const std::string
for (auto& schema : partition_array) {
status = meta_ptr_->FilesToSearch(schema.collection_id_, files_holder);
}
#else
// no partition tag specified, means search in whole collection
// get files from root collection
status = meta_ptr_->FilesToSearch(collection_id, files_holder);
if (!status.ok()) {
return status;
}
// get files from partitions
std::set<std::string> partition_ids;
std::vector<meta::CollectionSchema> partition_array;
status = meta_ptr_->ShowPartitions(collection_id, partition_array);
for (auto& id : partition_array) {
partition_ids.insert(id.collection_id_);
}
status = meta_ptr_->FilesToSearchEx(collection_id, partition_ids, files_holder);
if (!status.ok()) {
return status;
}
#endif
if (files_holder.HoldFiles().empty()) {
return Status::OK(); // no files to search
}
} else {
#if 0
// get files from specified partitions
std::set<std::string> partition_name_array;
status = GetPartitionsByTags(collection_id, partition_tags, partition_name_array);
......@@ -1669,7 +1712,20 @@ DBImpl::Query(const std::shared_ptr<server::Context>& context, const std::string
for (auto& partition_name : partition_name_array) {
status = meta_ptr_->FilesToSearch(partition_name, files_holder);
}
#else
std::set<std::string> partition_name_array;
status = GetPartitionsByTags(collection_id, partition_tags, partition_name_array);
if (!status.ok()) {
return status; // didn't match any partition.
}
std::set<std::string> partition_ids;
for (auto& partition_name : partition_name_array) {
partition_ids.insert(partition_name);
}
status = meta_ptr_->FilesToSearchEx(collection_id, partition_ids, files_holder);
#endif
if (files_holder.HoldFiles().empty()) {
return Status::OK(); // no files to search
}
......
......@@ -28,17 +28,20 @@ struct {
Status
MergeAdaptiveStrategy::RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGroups& files_groups) {
meta::SegmentsSchema sort_files;
meta::SegmentsSchema sort_files, ignore_files;
meta::SegmentsSchema& files = files_holder.HoldFiles();
for (meta::SegmentsSchema::reverse_iterator iter = files.rbegin(); iter != files.rend(); ++iter) {
meta::SegmentSchema& file = *iter;
if (file.index_file_size_ > 0 && (int64_t)file.file_size_ > file.index_file_size_) {
// file that no need to merge
ignore_files.push_back(file);
continue;
}
sort_files.push_back(file);
}
files_holder.UnmarkFiles(ignore_files);
// no need to merge single file
if (sort_files.size() < 2) {
return Status::OK();
......
......@@ -1714,7 +1714,7 @@ MySQLMetaImpl::FilesToSearchEx(const std::string& root_collection, const std::se
// distribute id array to batchs
const int64_t batch_size = 50;
std::vector<std::vector<std::string>> id_groups;
std::vector<std::string> temp_group = {root_collection};
std::vector<std::string> temp_group;
int64_t count = 1;
for (auto& id : partition_id_array) {
temp_group.push_back(id);
......@@ -1739,6 +1739,8 @@ MySQLMetaImpl::FilesToSearchEx(const std::string& root_collection, const std::se
mysqlpp::ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab_);
bool is_null_connection = (connectionPtr == nullptr);
fiu_do_on("MySQLMetaImpl.FilesToSearch.null_connection", is_null_connection = true);
fiu_do_on("MySQLMetaImpl.FilesToSearch.throw_exception", throw std::exception(););
if (is_null_connection) {
return Status(DB_ERROR, "Failed to connect to meta server(mysql)");
}
......
......@@ -1135,6 +1135,7 @@ SqliteMetaImpl::FilesToSearchEx(const std::string& root_collection,
FilesHolder& files_holder) {
try {
server::MetricCollector metric;
fiu_do_on("SqliteMetaImpl.FilesToSearch.throw_exception", throw std::exception());
// get root collection information
CollectionSchema collection_schema;
......@@ -1147,7 +1148,7 @@ SqliteMetaImpl::FilesToSearchEx(const std::string& root_collection,
// distribute id array to batchs
const int64_t batch_size = 50;
std::vector<std::vector<std::string>> id_groups;
std::vector<std::string> temp_group = {root_collection};
std::vector<std::string> temp_group;
int64_t count = 1;
for (auto& id : partition_id_array) {
temp_group.push_back(id);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册