Utils.cpp 10.5 KB
Newer Older
1
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
J
jinhai 已提交
2
//
3 4
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
J
jinhai 已提交
5
//
6 7 8 9 10
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
J
jinhai 已提交
11

S
starlord 已提交
12
#include "db/Utils.h"
X
Xu Peng 已提交
13

S
shengjh 已提交
14
#include <fiu-local.h>
15

S
starlord 已提交
16
#include <boost/filesystem.hpp>
X
Xu Peng 已提交
17
#include <chrono>
S
starlord 已提交
18
#include <mutex>
19
#include <regex>
S
starlord 已提交
20
#include <vector>
X
Xu Peng 已提交
21

22
#include "config/Config.h"
C
Cai Yudong 已提交
23
//#include "storage/s3/S3ClientWrapper.h"
24 25 26
#include "utils/CommonUtil.h"
#include "utils/Log.h"

27 28
#include <map>

J
jinhai 已提交
29
namespace milvus {
X
Xu Peng 已提交
30 31 32
namespace engine {
namespace utils {

S
starlord 已提交
33 34
namespace {

S
starlord 已提交
35
const char* TABLES_FOLDER = "/tables/";
S
starlord 已提交
36

J
jinhai 已提交
37 38
uint64_t index_file_counter = 0;
std::mutex index_file_counter_mutex;
39

C
Cai Yudong 已提交
40
static std::string
J
Jin Hai 已提交
41 42
ConstructParentFolder(const std::string& db_path, const meta::SegmentSchema& table_file) {
    std::string table_path = db_path + TABLES_FOLDER + table_file.collection_id_;
43
    std::string partition_path = table_path + "/" + table_file.segment_id_;
S
starlord 已提交
44 45 46
    return partition_path;
}

C
Cai Yudong 已提交
47
static std::string
G
groot 已提交
48
GetCollectionFileParentFolder(const DBMetaOptions& options, const meta::SegmentSchema& table_file) {
S
starlord 已提交
49 50
    uint64_t path_count = options.slave_paths_.size() + 1;
    std::string target_path = options.path_;
51 52
    uint64_t index = 0;

J
Jin Hai 已提交
53
    if (meta::SegmentSchema::NEW_INDEX == table_file.file_type_) {
54 55 56 57 58
        // index file is large file and to be persisted permanently
        // we need to distribute index files to each db_path averagely
        // round robin according to a file counter
        std::lock_guard<std::mutex> lock(index_file_counter_mutex);
        index = index_file_counter % path_count;
59
        ++index_file_counter;
60 61 62 63 64 65 66
    } else {
        // for other type files, they could be merged or deleted
        // so we round robin according to their file id
        index = table_file.id_ % path_count;
    }

    if (index > 0) {
S
starlord 已提交
67
        target_path = options.slave_paths_[index - 1];
S
starlord 已提交
68 69 70 71 72
    }

    return ConstructParentFolder(target_path, table_file);
}

S
starlord 已提交
73
}  // namespace
S
starlord 已提交
74

S
starlord 已提交
75 76
int64_t
GetMicroSecTimeStamp() {
X
Xu Peng 已提交
77
    auto now = std::chrono::system_clock::now();
S
starlord 已提交
78
    auto micros = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
X
Xu Peng 已提交
79 80 81 82

    return micros;
}

S
starlord 已提交
83
Status
84
CreateCollectionPath(const DBMetaOptions& options, const std::string& collection_id) {
S
starlord 已提交
85
    std::string db_path = options.path_;
J
Jin Hai 已提交
86
    std::string table_path = db_path + TABLES_FOLDER + collection_id;
S
starlord 已提交
87
    auto status = server::CommonUtil::CreateDirectory(table_path);
S
starlord 已提交
88
    if (!status.ok()) {
89
        LOG_ENGINE_ERROR_ << status.message();
S
starlord 已提交
90
        return status;
S
starlord 已提交
91 92
    }

S
starlord 已提交
93
    for (auto& path : options.slave_paths_) {
J
Jin Hai 已提交
94
        table_path = path + TABLES_FOLDER + collection_id;
S
starlord 已提交
95
        status = server::CommonUtil::CreateDirectory(table_path);
96
        fiu_do_on("CreateCollectionPath.creat_slave_path", status = Status(DB_INVALID_PATH, ""));
S
starlord 已提交
97
        if (!status.ok()) {
98
            LOG_ENGINE_ERROR_ << status.message();
S
starlord 已提交
99
            return status;
S
starlord 已提交
100 101 102 103 104 105
        }
    }

    return Status::OK();
}

S
starlord 已提交
106
Status
G
groot 已提交
107
DeleteCollectionPath(const DBMetaOptions& options, const std::string& collection_id, bool force) {
S
starlord 已提交
108 109
    std::vector<std::string> paths = options.slave_paths_;
    paths.push_back(options.path_);
S
starlord 已提交
110

S
starlord 已提交
111
    for (auto& path : paths) {
J
Jin Hai 已提交
112
        std::string table_path = path + TABLES_FOLDER + collection_id;
S
starlord 已提交
113
        if (force) {
S
starlord 已提交
114
            boost::filesystem::remove_all(table_path);
115
            LOG_ENGINE_DEBUG_ << "Remove collection folder: " << table_path;
S
starlord 已提交
116
        } else if (boost::filesystem::exists(table_path) && boost::filesystem::is_empty(table_path)) {
S
starlord 已提交
117
            boost::filesystem::remove_all(table_path);
118
            LOG_ENGINE_DEBUG_ << "Remove collection folder: " << table_path;
S
starlord 已提交
119
        }
S
starlord 已提交
120 121
    }

C
Cai Yudong 已提交
122 123 124
    // bool s3_enable = false;
    // server::Config& config = server::Config::GetInstance();
    // config.GetStorageConfigS3Enable(s3_enable);
C
Cai Yudong 已提交
125

C
Cai Yudong 已提交
126 127
    // if (s3_enable) {
    //     std::string table_path = options.path_ + TABLES_FOLDER + collection_id;
C
Cai Yudong 已提交
128

C
Cai Yudong 已提交
129 130 131 132 133 134
    //     auto& storage_inst = milvus::storage::S3ClientWrapper::GetInstance();
    //     Status stat = storage_inst.DeleteObjects(table_path);
    //     if (!stat.ok()) {
    //         return stat;
    //     }
    // }
C
Cai Yudong 已提交
135

S
starlord 已提交
136 137 138
    return Status::OK();
}

S
starlord 已提交
139
Status
140
CreateCollectionFilePath(const DBMetaOptions& options, meta::SegmentSchema& table_file) {
G
groot 已提交
141
    std::string parent_path = GetCollectionFileParentFolder(options, table_file);
S
starlord 已提交
142 143

    auto status = server::CommonUtil::CreateDirectory(parent_path);
144
    fiu_do_on("CreateCollectionFilePath.fail_create", status = Status(DB_INVALID_PATH, ""));
S
starlord 已提交
145
    if (!status.ok()) {
146
        LOG_ENGINE_ERROR_ << status.message();
S
starlord 已提交
147
        return status;
S
starlord 已提交
148 149 150 151 152 153 154
    }

    table_file.location_ = parent_path + "/" + table_file.file_id_;

    return Status::OK();
}

S
starlord 已提交
155
Status
G
groot 已提交
156
GetCollectionFilePath(const DBMetaOptions& options, meta::SegmentSchema& table_file) {
S
starlord 已提交
157
    std::string parent_path = ConstructParentFolder(options.path_, table_file);
S
starlord 已提交
158
    std::string file_path = parent_path + "/" + table_file.file_id_;
C
Cai Yudong 已提交
159

160
    bool s3_enable = false;
C
Cai Yudong 已提交
161
    server::Config& config = server::Config::GetInstance();
162
    config.GetStorageConfigS3Enable(s3_enable);
G
groot 已提交
163
    fiu_do_on("GetCollectionFilePath.enable_s3", s3_enable = true);
164
    if (s3_enable) {
C
Cai Yudong 已提交
165 166 167 168 169
        /* need not check file existence */
        table_file.location_ = file_path;
        return Status::OK();
    }

170
    if (boost::filesystem::exists(parent_path)) {
S
starlord 已提交
171 172
        table_file.location_ = file_path;
        return Status::OK();
S
starlord 已提交
173 174 175 176 177
    }

    for (auto& path : options.slave_paths_) {
        parent_path = ConstructParentFolder(path, table_file);
        file_path = parent_path + "/" + table_file.file_id_;
178
        if (boost::filesystem::exists(parent_path)) {
S
starlord 已提交
179 180
            table_file.location_ = file_path;
            return Status::OK();
S
starlord 已提交
181 182 183
        }
    }

J
Jin Hai 已提交
184
    std::string msg = "Collection file doesn't exist: " + file_path;
G
groot 已提交
185
    if (table_file.file_size_ > 0) {  // no need to pop error for empty file
186
        LOG_ENGINE_ERROR_ << msg << " in path: " << options.path_ << " for collection: " << table_file.collection_id_;
G
groot 已提交
187
    }
S
starlord 已提交
188

S
starlord 已提交
189
    return Status(DB_ERROR, msg);
S
starlord 已提交
190 191
}

S
starlord 已提交
192
Status
G
groot 已提交
193 194
DeleteCollectionFilePath(const DBMetaOptions& options, meta::SegmentSchema& table_file) {
    utils::GetCollectionFilePath(options, table_file);
S
starlord 已提交
195 196 197 198
    boost::filesystem::remove(table_file.location_);
    return Status::OK();
}

199
Status
J
Jin Hai 已提交
200
DeleteSegment(const DBMetaOptions& options, meta::SegmentSchema& table_file) {
G
groot 已提交
201
    utils::GetCollectionFilePath(options, table_file);
202 203 204 205 206 207 208 209 210 211 212 213 214
    std::string segment_dir;
    GetParentPath(table_file.location_, segment_dir);
    boost::filesystem::remove_all(segment_dir);
    return Status::OK();
}

Status
GetParentPath(const std::string& path, std::string& parent_path) {
    boost::filesystem::path p(path);
    parent_path = p.parent_path().string();
    return Status::OK();
}

S
starlord 已提交
215
bool
216
IsSameIndex(const CollectionIndex& index1, const CollectionIndex& index2) {
217
    return index1.engine_type_ == index2.engine_type_ && index1.extra_params_ == index2.extra_params_ &&
S
starlord 已提交
218
           index1.metric_type_ == index2.metric_type_;
219 220
}

221 222 223 224 225
bool
IsRawIndexType(int32_t type) {
    return (type == (int32_t)EngineType::FAISS_IDMAP) || (type == (int32_t)EngineType::FAISS_BIN_IDMAP);
}

226 227 228 229
bool
IsBinaryMetricType(int32_t metric_type) {
    return (metric_type == (int32_t)engine::MetricType::HAMMING) ||
           (metric_type == (int32_t)engine::MetricType::JACCARD) ||
230 231
           (metric_type == (int32_t)engine::MetricType::SUBSTRUCTURE) ||
           (metric_type == (int32_t)engine::MetricType::SUPERSTRUCTURE) ||
232 233 234
           (metric_type == (int32_t)engine::MetricType::TANIMOTO);
}

S
starlord 已提交
235
meta::DateT
S
starlord 已提交
236
GetDate(const std::time_t& t, int day_delta) {
237 238 239 240 241 242
    struct tm ltm;
    localtime_r(&t, &ltm);
    if (day_delta > 0) {
        do {
            ++ltm.tm_mday;
            --day_delta;
S
starlord 已提交
243
        } while (day_delta > 0);
244 245 246 247 248
        mktime(&ltm);
    } else if (day_delta < 0) {
        do {
            --ltm.tm_mday;
            ++day_delta;
S
starlord 已提交
249
        } while (day_delta < 0);
250 251 252 253
        mktime(&ltm);
    } else {
        ltm.tm_mday;
    }
S
starlord 已提交
254
    return ltm.tm_year * 10000 + ltm.tm_mon * 100 + ltm.tm_mday;
255 256
}

S
starlord 已提交
257 258
meta::DateT
GetDateWithDelta(int day_delta) {
259 260 261
    return GetDate(std::time(nullptr), day_delta);
}

S
starlord 已提交
262 263
meta::DateT
GetDate() {
264 265 266
    return GetDate(std::time(nullptr), 0);
}

267
// URI format: dialect://username:password@host:port/database
S
starlord 已提交
268
Status
S
starlord 已提交
269
ParseMetaUri(const std::string& uri, MetaUriInfo& info) {
270 271 272 273 274 275
    std::string dialect_regex = "(.*)";
    std::string username_tegex = "(.*)";
    std::string password_regex = "(.*)";
    std::string host_regex = "(.*)";
    std::string port_regex = "(.*)";
    std::string db_name_regex = "(.*)";
S
starlord 已提交
276 277
    std::string uri_regex_str = dialect_regex + "\\:\\/\\/" + username_tegex + "\\:" + password_regex + "\\@" +
                                host_regex + "\\:" + port_regex + "\\/" + db_name_regex;
278 279 280 281 282 283 284 285 286 287 288 289

    std::regex uri_regex(uri_regex_str);
    std::smatch pieces_match;

    if (std::regex_match(uri, pieces_match, uri_regex)) {
        info.dialect_ = pieces_match[1].str();
        info.username_ = pieces_match[2].str();
        info.password_ = pieces_match[3].str();
        info.host_ = pieces_match[4].str();
        info.port_ = pieces_match[5].str();
        info.db_name_ = pieces_match[6].str();

S
starlord 已提交
290
        // TODO(myh): verify host, port...
291 292 293 294 295 296 297
    } else {
        return Status(DB_INVALID_META_URI, "Invalid meta uri: " + uri);
    }

    return Status::OK();
}

298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
std::string
GetIndexName(int32_t index_type) {
    static std::map<int32_t, std::string> index_type_name = {
        {(int32_t)engine::EngineType::FAISS_IDMAP, "IDMAP"},
        {(int32_t)engine::EngineType::FAISS_IVFFLAT, "IVFFLAT"},
        {(int32_t)engine::EngineType::FAISS_IVFSQ8, "IVFSQ8"},
        {(int32_t)engine::EngineType::NSG_MIX, "NSG"},
        {(int32_t)engine::EngineType::ANNOY, "ANNOY"},
        {(int32_t)engine::EngineType::FAISS_IVFSQ8H, "IVFSQ8H"},
        {(int32_t)engine::EngineType::FAISS_PQ, "PQ"},
        {(int32_t)engine::EngineType::SPTAG_KDT, "KDT"},
        {(int32_t)engine::EngineType::SPTAG_BKT, "BKT"},
        {(int32_t)engine::EngineType::FAISS_BIN_IDMAP, "IDMAP"},
        {(int32_t)engine::EngineType::FAISS_BIN_IVFFLAT, "IVFFLAT"},
    };

    if (index_type_name.find(index_type) == index_type_name.end()) {
        return "Unknow";
    }

    return index_type_name[index_type];
}

S
starlord 已提交
321 322 323
}  // namespace utils
}  // namespace engine
}  // namespace milvus