From 10ee4d90157973194cde4599869a275cd5f553df Mon Sep 17 00:00:00 2001 From: groot Date: Fri, 22 May 2020 22:13:09 -0500 Subject: [PATCH] #2366 Reduce SQL execution times (#2383) * #2366 Signed-off-by: yhmo * fix ut Signed-off-by: yhmo * fix python test Signed-off-by: yhmo * fix ut Signed-off-by: groot * changelog Signed-off-by: groot --- CHANGELOG.md | 1 + core/src/db/DBImpl.cpp | 56 +++++++++++++++++++++ core/src/db/merge/MergeAdaptiveStrategy.cpp | 5 +- core/src/db/meta/MySQLMetaImpl.cpp | 4 +- core/src/db/meta/SqliteMetaImpl.cpp | 3 +- 5 files changed, 66 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ec4de955..f88204d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ Please mark all change in change log and use the issue from GitHub # Milvus 0.9.1 (TBD) ## Bug +- \#2366 Reduce SQL execution times for collection contains lot of partitions - \#2378 Duplicate data after server restart - \#2399 The nlist set by the user may not take effect - \#2403 MySQL max_idle_time is 10 by default diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 8fdf9f1e..e4042f77 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -393,6 +393,7 @@ DBImpl::PreloadCollection(const std::string& collection_id) { // step 1: get all collection files from parent collection meta::FilesHolder files_holder; +#if 0 auto status = meta_ptr_->FilesToSearch(collection_id, files_holder); if (!status.ok()) { return status; @@ -404,6 +405,25 @@ DBImpl::PreloadCollection(const std::string& collection_id) { for (auto& schema : partition_array) { status = meta_ptr_->FilesToSearch(schema.collection_id_, files_holder); } +#else + auto status = meta_ptr_->FilesToSearch(collection_id, files_holder); + if (!status.ok()) { + return status; + } + + std::vector partition_array; + status = meta_ptr_->ShowPartitions(collection_id, partition_array); + + std::set partition_ids; + for (auto& schema : partition_array) { + partition_ids.insert(schema.collection_id_); + } + + status = meta_ptr_->FilesToSearchEx(collection_id, partition_ids, files_holder); + if (!status.ok()) { + return status; + } +#endif int64_t size = 0; int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity(); @@ -1642,6 +1662,7 @@ DBImpl::Query(const std::shared_ptr& context, const std::string Status status; meta::FilesHolder files_holder; if (partition_tags.empty()) { +#if 0 // no partition tag specified, means search in whole collection // get all collection files from parent collection status = meta_ptr_->FilesToSearch(collection_id, files_holder); @@ -1654,11 +1675,33 @@ DBImpl::Query(const std::shared_ptr& context, const std::string for (auto& schema : partition_array) { status = meta_ptr_->FilesToSearch(schema.collection_id_, files_holder); } +#else + // no partition tag specified, means search in whole collection + // get files from root collection + status = meta_ptr_->FilesToSearch(collection_id, files_holder); + if (!status.ok()) { + return status; + } + + // get files from partitions + std::set partition_ids; + std::vector partition_array; + status = meta_ptr_->ShowPartitions(collection_id, partition_array); + for (auto& id : partition_array) { + partition_ids.insert(id.collection_id_); + } + + status = meta_ptr_->FilesToSearchEx(collection_id, partition_ids, files_holder); + if (!status.ok()) { + return status; + } +#endif if (files_holder.HoldFiles().empty()) { return Status::OK(); // no files to search } } else { +#if 0 // get files from specified partitions std::set partition_name_array; status = GetPartitionsByTags(collection_id, partition_tags, partition_name_array); @@ -1669,7 +1712,20 @@ DBImpl::Query(const std::shared_ptr& context, const std::string for (auto& partition_name : partition_name_array) { status = meta_ptr_->FilesToSearch(partition_name, files_holder); } +#else + std::set partition_name_array; + status = GetPartitionsByTags(collection_id, partition_tags, partition_name_array); + if (!status.ok()) { + return status; // didn't match any partition. + } + std::set partition_ids; + for (auto& partition_name : partition_name_array) { + partition_ids.insert(partition_name); + } + + status = meta_ptr_->FilesToSearchEx(collection_id, partition_ids, files_holder); +#endif if (files_holder.HoldFiles().empty()) { return Status::OK(); // no files to search } diff --git a/core/src/db/merge/MergeAdaptiveStrategy.cpp b/core/src/db/merge/MergeAdaptiveStrategy.cpp index 1859310c..4ebcaeaa 100644 --- a/core/src/db/merge/MergeAdaptiveStrategy.cpp +++ b/core/src/db/merge/MergeAdaptiveStrategy.cpp @@ -28,17 +28,20 @@ struct { Status MergeAdaptiveStrategy::RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGroups& files_groups) { - meta::SegmentsSchema sort_files; + meta::SegmentsSchema sort_files, ignore_files; meta::SegmentsSchema& files = files_holder.HoldFiles(); for (meta::SegmentsSchema::reverse_iterator iter = files.rbegin(); iter != files.rend(); ++iter) { meta::SegmentSchema& file = *iter; if (file.index_file_size_ > 0 && (int64_t)file.file_size_ > file.index_file_size_) { // file that no need to merge + ignore_files.push_back(file); continue; } sort_files.push_back(file); } + files_holder.UnmarkFiles(ignore_files); + // no need to merge single file if (sort_files.size() < 2) { return Status::OK(); diff --git a/core/src/db/meta/MySQLMetaImpl.cpp b/core/src/db/meta/MySQLMetaImpl.cpp index 40178cf8..e18c0ef7 100644 --- a/core/src/db/meta/MySQLMetaImpl.cpp +++ b/core/src/db/meta/MySQLMetaImpl.cpp @@ -1714,7 +1714,7 @@ MySQLMetaImpl::FilesToSearchEx(const std::string& root_collection, const std::se // distribute id array to batchs const int64_t batch_size = 50; std::vector> id_groups; - std::vector temp_group = {root_collection}; + std::vector temp_group; int64_t count = 1; for (auto& id : partition_id_array) { temp_group.push_back(id); @@ -1739,6 +1739,8 @@ MySQLMetaImpl::FilesToSearchEx(const std::string& root_collection, const std::se mysqlpp::ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab_); bool is_null_connection = (connectionPtr == nullptr); + fiu_do_on("MySQLMetaImpl.FilesToSearch.null_connection", is_null_connection = true); + fiu_do_on("MySQLMetaImpl.FilesToSearch.throw_exception", throw std::exception();); if (is_null_connection) { return Status(DB_ERROR, "Failed to connect to meta server(mysql)"); } diff --git a/core/src/db/meta/SqliteMetaImpl.cpp b/core/src/db/meta/SqliteMetaImpl.cpp index a3e9f273..b15602b7 100644 --- a/core/src/db/meta/SqliteMetaImpl.cpp +++ b/core/src/db/meta/SqliteMetaImpl.cpp @@ -1135,6 +1135,7 @@ SqliteMetaImpl::FilesToSearchEx(const std::string& root_collection, FilesHolder& files_holder) { try { server::MetricCollector metric; + fiu_do_on("SqliteMetaImpl.FilesToSearch.throw_exception", throw std::exception()); // get root collection information CollectionSchema collection_schema; @@ -1147,7 +1148,7 @@ SqliteMetaImpl::FilesToSearchEx(const std::string& root_collection, // distribute id array to batchs const int64_t batch_size = 50; std::vector> id_groups; - std::vector temp_group = {root_collection}; + std::vector temp_group; int64_t count = 1; for (auto& id : partition_id_array) { temp_group.push_back(id); -- GitLab