VectorCompressFormat.cpp 4.0 KB
Newer Older
C
Cai Yudong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#include <boost/filesystem.hpp>
#include <memory>
20
#include <unordered_map>
C
Cai Yudong 已提交
21

G
groot 已提交
22
#include "codecs/VectorCompressFormat.h"
G
groot 已提交
23
#include "db/Utils.h"
C
Cai Yudong 已提交
24
#include "knowhere/common/BinarySet.h"
25
#include "storage/ExtraFileInfo.h"
C
Cai Yudong 已提交
26 27 28 29 30 31 32
#include "utils/Exception.h"
#include "utils/Log.h"
#include "utils/TimeRecorder.h"

namespace milvus {
namespace codec {

G
groot 已提交
33 34 35
const char* VECTOR_COMPRESS_POSTFIX = ".cmp";

std::string
G
groot 已提交
36
VectorCompressFormat::FilePostfix() {
G
groot 已提交
37 38 39 40
    std::string str = VECTOR_COMPRESS_POSTFIX;
    return str;
}

G
groot 已提交
41
Status
G
groot 已提交
42 43
VectorCompressFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
                           knowhere::BinaryPtr& compress) {
G
groot 已提交
44
    milvus::TimeRecorder recorder("VectorCompressFormat::Read");
C
Cai Yudong 已提交
45

G
groot 已提交
46
    const std::string full_file_path = file_path + VECTOR_COMPRESS_POSTFIX;
47 48
    CHECK_MAGIC_VALID(fs_ptr, full_file_path);
    CHECK_SUM_VALID(fs_ptr, full_file_path);
49
    if (!fs_ptr->reader_ptr_->Open(full_file_path)) {
G
groot 已提交
50
        return Status(SERVER_CANNOT_OPEN_FILE, "Fail to open vector compress file: " + full_file_path);
C
Cai Yudong 已提交
51 52
    }

53
    int64_t length = fs_ptr->reader_ptr_->Length() - MAGIC_SIZE - HEADER_SIZE - SUM_SIZE;
C
Cai Yudong 已提交
54
    if (length <= 0) {
G
groot 已提交
55
        return Status(SERVER_UNEXPECTED_ERROR, "Invalid vector compress length: " + full_file_path);
C
Cai Yudong 已提交
56 57
    }

58
    compress = std::make_shared<knowhere::Binary>();
C
Cai Yudong 已提交
59 60 61
    compress->data = std::shared_ptr<uint8_t[]>(new uint8_t[length]);
    compress->size = length;

62
    fs_ptr->reader_ptr_->Seekg(MAGIC_SIZE + HEADER_SIZE);
63 64
    fs_ptr->reader_ptr_->Read(compress->data.get(), length);
    fs_ptr->reader_ptr_->Close();
C
Cai Yudong 已提交
65 66 67

    double span = recorder.RecordSection("End");
    double rate = length * 1000000.0 / span / 1024 / 1024;
G
groot 已提交
68
    LOG_ENGINE_DEBUG_ << "VectorCompressFormat::Read(" << full_file_path << ") rate " << rate << "MB/s";
G
groot 已提交
69 70

    return Status::OK();
C
Cai Yudong 已提交
71 72
}

G
groot 已提交
73
Status
G
groot 已提交
74 75
VectorCompressFormat::Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
                            const knowhere::BinaryPtr& compress) {
G
groot 已提交
76
    milvus::TimeRecorder recorder("VectorCompressFormat::Write");
C
Cai Yudong 已提交
77

G
groot 已提交
78
    const std::string full_file_path = file_path + VECTOR_COMPRESS_POSTFIX;
79 80
    // TODO: add extra info
    std::unordered_map<std::string, std::string> maps;
81
    WRITE_MAGIC(fs_ptr, full_file_path);
82 83
    WRITE_HEADER(fs_ptr, full_file_path, maps);
    if (!fs_ptr->writer_ptr_->InOpen(full_file_path)) {
G
groot 已提交
84
        return Status(SERVER_CANNOT_OPEN_FILE, "Fail to open vector compress: " + full_file_path);
C
Cai Yudong 已提交
85 86
    }

G
groot 已提交
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
    try {
        fs_ptr->writer_ptr_->Seekp(MAGIC_SIZE + HEADER_SIZE);
        fs_ptr->writer_ptr_->Write(compress->data.get(), compress->size);
        fs_ptr->writer_ptr_->Close();
        WRITE_SUM(fs_ptr, full_file_path);

        double span = recorder.RecordSection("End");
        double rate = compress->size * 1000000.0 / span / 1024 / 1024;
        LOG_ENGINE_DEBUG_ << "SVectorCompressFormat::Write(" << full_file_path << ") rate " << rate << "MB/s";
    } catch (std::exception& ex) {
        std::string err_msg = "Failed to write compress data: " + std::string(ex.what());
        LOG_ENGINE_ERROR_ << err_msg;

        engine::utils::SendExitSignal();
        return Status(SERVER_WRITE_ERROR, err_msg);
    }
C
Cai Yudong 已提交
103

G
groot 已提交
104
    return Status::OK();
C
Cai Yudong 已提交
105 106 107 108
}

}  // namespace codec
}  // namespace milvus