未验证 提交 8b3b15c9 编写于 作者: G groot 提交者: GitHub

refine code (#3022)

* clean code first stage
Signed-off-by: Ngroot <yihua.mo@zilliz.com>

* rename some files
Signed-off-by: Ngroot <yihua.mo@zilliz.com>

* rename
Signed-off-by: Ngroot <yihua.mo@zilliz.com>

* typo
Signed-off-by: Ngroot <yihua.mo@zilliz.com>
上级 b565b33f
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "segment/Attrs.h"
#include "storage/FSHandler.h"
namespace milvus {
namespace codec {
class AttrsFormat {
public:
virtual void
read(const storage::FSHandlerPtr& fs_ptr, segment::AttrsPtr& attrs_read) = 0;
virtual void
write(const storage::FSHandlerPtr& fs_ptr, const segment::AttrsPtr& attr) = 0;
virtual void
read_uids(const storage::FSHandlerPtr& fs_ptr, std::vector<int64_t>& uids) = 0;
virtual void
read_attrs(const storage::FSHandlerPtr& fs_ptr, const std::string& field_name, off_t offset, size_t num_bytes,
std::vector<uint8_t>& raw_attrs) = 0;
};
using AttrsFormatPtr = std::shared_ptr<AttrsFormat>;
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <string>
#include "segment/AttrsIndex.h"
#include "storage/FSHandler.h"
namespace milvus {
namespace codec {
class AttrsIndexFormat {
public:
virtual void
read(const storage::FSHandlerPtr& fd_ptr, segment::AttrsIndexPtr& attr_index) = 0;
virtual void
write(const storage::FSHandlerPtr& fs_ptr, const segment::AttrsIndexPtr& attr_index) = 0;
};
using AttrsIndexFormatPtr = std::shared_ptr<AttrsIndexFormat>;
} // namespace codec
} // namespace milvus
......@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
#include "codecs/snapshot/SSBlockFormat.h"
#include "codecs/BlockFormat.h"
#include <fcntl.h>
#include <unistd.h>
......@@ -32,7 +32,7 @@ namespace milvus {
namespace codec {
void
SSBlockFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, std::vector<uint8_t>& raw) {
BlockFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, std::vector<uint8_t>& raw) {
if (!fs_ptr->reader_ptr_->open(file_path.c_str())) {
std::string err_msg = "Failed to open file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
......@@ -49,8 +49,8 @@ SSBlockFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file
}
void
SSBlockFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, int64_t offset,
int64_t num_bytes, std::vector<uint8_t>& raw) {
BlockFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, int64_t offset, int64_t num_bytes,
std::vector<uint8_t>& raw) {
if (offset < 0 || num_bytes <= 0) {
std::string err_msg = "Invalid input to read: " + file_path;
LOG_ENGINE_ERROR_ << err_msg;
......@@ -80,8 +80,8 @@ SSBlockFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file
}
void
SSBlockFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, const ReadRanges& read_ranges,
std::vector<uint8_t>& raw) {
BlockFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, const ReadRanges& read_ranges,
std::vector<uint8_t>& raw) {
if (read_ranges.empty()) {
return;
}
......@@ -121,8 +121,7 @@ SSBlockFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file
}
void
SSBlockFormat::Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const std::vector<uint8_t>& raw) {
BlockFormat::Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, const std::vector<uint8_t>& raw) {
if (!fs_ptr->writer_ptr_->open(file_path.c_str())) {
std::string err_msg = "Failed to open file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
......
......@@ -36,9 +36,9 @@ struct ReadRange {
using ReadRanges = std::vector<ReadRange>;
class SSBlockFormat {
class BlockFormat {
public:
SSBlockFormat() = default;
BlockFormat() = default;
void
Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, std::vector<uint8_t>& raw);
......@@ -55,16 +55,16 @@ class SSBlockFormat {
Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, const std::vector<uint8_t>& raw);
// No copy and move
SSBlockFormat(const SSBlockFormat&) = delete;
SSBlockFormat(SSBlockFormat&&) = delete;
BlockFormat(const BlockFormat&) = delete;
BlockFormat(BlockFormat&&) = delete;
SSBlockFormat&
operator=(const SSBlockFormat&) = delete;
SSBlockFormat&
operator=(SSBlockFormat&&) = delete;
BlockFormat&
operator=(const BlockFormat&) = delete;
BlockFormat&
operator=(BlockFormat&&) = delete;
};
using SSBlockFormatPtr = std::shared_ptr<SSBlockFormat>;
using BlockFormatPtr = std::shared_ptr<BlockFormat>;
} // namespace codec
} // namespace milvus
......@@ -15,60 +15,60 @@
// specific language governing permissions and limitations
// under the License.
#include "codecs/snapshot/SSCodec.h"
#include "codecs/Codec.h"
#include <memory>
#include "SSDeletedDocsFormat.h"
#include "SSIdBloomFilterFormat.h"
#include "SSStructuredIndexFormat.h"
#include "SSVectorIndexFormat.h"
#include "DeletedDocsFormat.h"
#include "IdBloomFilterFormat.h"
#include "StructuredIndexFormat.h"
#include "VectorIndexFormat.h"
namespace milvus {
namespace codec {
SSCodec&
SSCodec::instance() {
static SSCodec s_instance;
Codec&
Codec::instance() {
static Codec s_instance;
return s_instance;
}
SSCodec::SSCodec() {
block_format_ptr_ = std::make_shared<SSBlockFormat>();
structured_index_format_ptr_ = std::make_shared<SSStructuredIndexFormat>();
vector_index_format_ptr_ = std::make_shared<SSVectorIndexFormat>();
deleted_docs_format_ptr_ = std::make_shared<SSDeletedDocsFormat>();
id_bloom_filter_format_ptr_ = std::make_shared<SSIdBloomFilterFormat>();
vector_compress_format_ptr_ = std::make_shared<SSVectorCompressFormat>();
Codec::Codec() {
block_format_ptr_ = std::make_shared<BlockFormat>();
structured_index_format_ptr_ = std::make_shared<StructuredIndexFormat>();
vector_index_format_ptr_ = std::make_shared<VectorIndexFormat>();
deleted_docs_format_ptr_ = std::make_shared<DeletedDocsFormat>();
id_bloom_filter_format_ptr_ = std::make_shared<IdBloomFilterFormat>();
vector_compress_format_ptr_ = std::make_shared<VectorCompressFormat>();
}
SSBlockFormatPtr
SSCodec::GetBlockFormat() {
BlockFormatPtr
Codec::GetBlockFormat() {
return block_format_ptr_;
}
SSVectorIndexFormatPtr
SSCodec::GetVectorIndexFormat() {
VectorIndexFormatPtr
Codec::GetVectorIndexFormat() {
return vector_index_format_ptr_;
}
SSStructuredIndexFormatPtr
SSCodec::GetStructuredIndexFormat() {
StructuredIndexFormatPtr
Codec::GetStructuredIndexFormat() {
return structured_index_format_ptr_;
}
SSDeletedDocsFormatPtr
SSCodec::GetDeletedDocsFormat() {
DeletedDocsFormatPtr
Codec::GetDeletedDocsFormat() {
return deleted_docs_format_ptr_;
}
SSIdBloomFilterFormatPtr
SSCodec::GetIdBloomFilterFormat() {
IdBloomFilterFormatPtr
Codec::GetIdBloomFilterFormat() {
return id_bloom_filter_format_ptr_;
}
SSVectorCompressFormatPtr
SSCodec::GetVectorCompressFormat() {
VectorCompressFormatPtr
Codec::GetVectorCompressFormat() {
return vector_compress_format_ptr_;
}
} // namespace codec
......
......@@ -17,55 +17,49 @@
#pragma once
#include "AttrsFormat.h"
#include "AttrsIndexFormat.h"
#include "DeletedDocsFormat.h"
#include "IdBloomFilterFormat.h"
#include "IdIndexFormat.h"
#include "VectorCompressFormat.h"
#include "VectorIndexFormat.h"
#include "VectorsFormat.h"
#include "utils/Exception.h"
#include "codecs/BlockFormat.h"
#include "codecs/DeletedDocsFormat.h"
#include "codecs/IdBloomFilterFormat.h"
#include "codecs/StructuredIndexFormat.h"
#include "codecs/VectorCompressFormat.h"
#include "codecs/VectorIndexFormat.h"
namespace milvus {
namespace codec {
class Codec {
public:
virtual VectorsFormatPtr
GetVectorsFormat() {
throw Exception(SERVER_UNSUPPORTED_ERROR, "vectors not supported");
}
static Codec&
instance();
virtual AttrsFormatPtr
GetAttrsFormat() {
throw Exception(SERVER_UNSUPPORTED_ERROR, "attr not supported");
}
BlockFormatPtr
GetBlockFormat();
virtual VectorIndexFormatPtr
GetVectorIndexFormat() {
throw Exception(SERVER_UNSUPPORTED_ERROR, "vectors index not supported");
}
VectorIndexFormatPtr
GetVectorIndexFormat();
virtual AttrsIndexFormatPtr
GetAttrsIndexFormat() {
throw Exception(SERVER_UNSUPPORTED_ERROR, "attr index not supported");
}
StructuredIndexFormatPtr
GetStructuredIndexFormat();
virtual DeletedDocsFormatPtr
GetDeletedDocsFormat() {
throw Exception(SERVER_UNSUPPORTED_ERROR, "delete doc index not supported");
}
DeletedDocsFormatPtr
GetDeletedDocsFormat();
virtual IdBloomFilterFormatPtr
GetIdBloomFilterFormat() {
throw Exception(SERVER_UNSUPPORTED_ERROR, "id bloom filter not supported");
}
IdBloomFilterFormatPtr
GetIdBloomFilterFormat();
virtual VectorCompressFormatPtr
GetVectorCompressFormat() {
throw Exception(SERVER_UNSUPPORTED_ERROR, "vector compress not supported");
}
VectorCompressFormatPtr
GetVectorCompressFormat();
private:
Codec();
private:
BlockFormatPtr block_format_ptr_;
StructuredIndexFormatPtr structured_index_format_ptr_;
VectorIndexFormatPtr vector_index_format_ptr_;
DeletedDocsFormatPtr deleted_docs_format_ptr_;
IdBloomFilterFormatPtr id_bloom_filter_format_ptr_;
VectorCompressFormatPtr vector_compress_format_ptr_;
};
} // namespace codec
......
......@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
#include "codecs/snapshot/SSDeletedDocsFormat.h"
#include "codecs/DeletedDocsFormat.h"
#include <fcntl.h>
#include <unistd.h>
......@@ -30,7 +30,6 @@
#include <string>
#include <vector>
#include "segment/Types.h"
#include "utils/Exception.h"
#include "utils/Log.h"
......@@ -40,14 +39,14 @@ namespace codec {
const char* DELETED_DOCS_POSTFIX = ".del";
std::string
SSDeletedDocsFormat::FilePostfix() {
DeletedDocsFormat::FilePostfix() {
std::string str = DELETED_DOCS_POSTFIX;
return str;
}
void
SSDeletedDocsFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
segment::DeletedDocsPtr& deleted_docs) {
DeletedDocsFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
segment::DeletedDocsPtr& deleted_docs) {
const std::string full_file_path = file_path + DELETED_DOCS_POSTFIX;
int del_fd = open(full_file_path.c_str(), O_RDONLY, 00664);
......@@ -84,8 +83,8 @@ SSDeletedDocsFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string
}
void
SSDeletedDocsFormat::Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const segment::DeletedDocsPtr& deleted_docs) {
DeletedDocsFormat::Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const segment::DeletedDocsPtr& deleted_docs) {
const std::string full_file_path = file_path + DELETED_DOCS_POSTFIX;
// Create a temporary file from the existing file
......@@ -154,7 +153,7 @@ SSDeletedDocsFormat::Write(const storage::FSHandlerPtr& fs_ptr, const std::strin
}
void
SSDeletedDocsFormat::ReadSize(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, size_t& size) {
DeletedDocsFormat::ReadSize(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, size_t& size) {
const std::string full_file_path = file_path + DELETED_DOCS_POSTFIX;
int del_fd = open(full_file_path.c_str(), O_RDONLY, 00664);
if (del_fd == -1) {
......
......@@ -18,6 +18,7 @@
#pragma once
#include <memory>
#include <string>
#include "segment/DeletedDocs.h"
#include "storage/FSHandler.h"
......@@ -27,14 +28,29 @@ namespace codec {
class DeletedDocsFormat {
public:
virtual void
read(const storage::FSHandlerPtr& fs_ptr, segment::DeletedDocsPtr& deleted_docs) = 0;
DeletedDocsFormat() = default;
virtual void
write(const storage::FSHandlerPtr& fs_ptr, const segment::DeletedDocsPtr& deleted_docs) = 0;
std::string
FilePostfix();
virtual void
readSize(const storage::FSHandlerPtr& fs_ptr, size_t& size) = 0;
void
Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, segment::DeletedDocsPtr& deleted_docs);
void
Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const segment::DeletedDocsPtr& deleted_docs);
void
ReadSize(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, size_t& size);
// No copy and move
DeletedDocsFormat(const DeletedDocsFormat&) = delete;
DeletedDocsFormat(DeletedDocsFormat&&) = delete;
DeletedDocsFormat&
operator=(const DeletedDocsFormat&) = delete;
DeletedDocsFormat&
operator=(DeletedDocsFormat&&) = delete;
};
using DeletedDocsFormatPtr = std::shared_ptr<DeletedDocsFormat>;
......
......@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
#include "codecs/snapshot/SSIdBloomFilterFormat.h"
#include "codecs/IdBloomFilterFormat.h"
#include <fiu-local.h>
#include <memory>
......@@ -33,14 +33,14 @@ constexpr unsigned int BLOOM_FILTER_CAPACITY = 500000;
constexpr double BLOOM_FILTER_ERROR_RATE = 0.01;
std::string
SSIdBloomFilterFormat::FilePostfix() {
IdBloomFilterFormat::FilePostfix() {
std::string str = BLOOM_FILTER_POSTFIX;
return str;
}
void
SSIdBloomFilterFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
IdBloomFilterFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
const std::string full_file_path = file_path + BLOOM_FILTER_POSTFIX;
scaling_bloom_t* bloom_filter =
new_scaling_bloom_from_file(BLOOM_FILTER_CAPACITY, BLOOM_FILTER_ERROR_RATE, full_file_path.c_str());
......@@ -54,8 +54,8 @@ SSIdBloomFilterFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::stri
}
void
SSIdBloomFilterFormat::Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
IdBloomFilterFormat::Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
const std::string full_file_path = file_path + BLOOM_FILTER_POSTFIX;
if (scaling_bloom_flush(id_bloom_filter_ptr->GetBloomFilter()) == -1) {
std::string err_msg = "Failed to write bloom filter to file: " + full_file_path + ". " + std::strerror(errno);
......@@ -65,8 +65,8 @@ SSIdBloomFilterFormat::Write(const storage::FSHandlerPtr& fs_ptr, const std::str
}
void
SSIdBloomFilterFormat::Create(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
IdBloomFilterFormat::Create(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
const std::string full_file_path = file_path + BLOOM_FILTER_POSTFIX;
scaling_bloom_t* bloom_filter =
new_scaling_bloom(BLOOM_FILTER_CAPACITY, BLOOM_FILTER_ERROR_RATE, full_file_path.c_str());
......
......@@ -18,6 +18,7 @@
#pragma once
#include <memory>
#include <string>
#include "segment/IdBloomFilter.h"
#include "storage/FSHandler.h"
......@@ -27,14 +28,31 @@ namespace codec {
class IdBloomFilterFormat {
public:
virtual void
read(const storage::FSHandlerPtr& fs_ptr, segment::IdBloomFilterPtr& id_bloom_filter_ptr) = 0;
IdBloomFilterFormat() = default;
virtual void
write(const storage::FSHandlerPtr& fs_ptr, const segment::IdBloomFilterPtr& id_bloom_filter_ptr) = 0;
std::string
FilePostfix();
virtual void
create(const storage::FSHandlerPtr& fs_ptr, segment::IdBloomFilterPtr& id_bloom_filter_ptr) = 0;
void
Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
segment::IdBloomFilterPtr& id_bloom_filter_ptr);
void
Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const segment::IdBloomFilterPtr& id_bloom_filter_ptr);
void
Create(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
segment::IdBloomFilterPtr& id_bloom_filter_ptr);
// No copy and move
IdBloomFilterFormat(const IdBloomFilterFormat&) = delete;
IdBloomFilterFormat(IdBloomFilterFormat&&) = delete;
IdBloomFilterFormat&
operator=(const IdBloomFilterFormat&) = delete;
IdBloomFilterFormat&
operator=(IdBloomFilterFormat&&) = delete;
};
using IdBloomFilterFormatPtr = std::shared_ptr<IdBloomFilterFormat>;
......
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
namespace milvus {
namespace codec {
class IdIndexFormat {
// public:
// virtual IdIndex
// read() = 0;
//
// virtual void
// write(IdIndex id_index) = 0;
};
} // namespace codec
} // namespace milvus
......@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
#include "codecs/snapshot/SSStructuredIndexFormat.h"
#include "codecs/StructuredIndexFormat.h"
#include <fcntl.h>
#include <unistd.h>
......@@ -37,13 +37,13 @@ namespace codec {
const char* STRUCTURED_INDEX_POSTFIX = ".ind";
std::string
SSStructuredIndexFormat::FilePostfix() {
StructuredIndexFormat::FilePostfix() {
std::string str = STRUCTURED_INDEX_POSTFIX;
return str;
}
knowhere::IndexPtr
SSStructuredIndexFormat::CreateStructuredIndex(const milvus::engine::meta::hybrid::DataType data_type) {
StructuredIndexFormat::CreateStructuredIndex(const milvus::engine::meta::hybrid::DataType data_type) {
knowhere::IndexPtr index = nullptr;
switch (data_type) {
case engine::meta::hybrid::DataType::INT8: {
......@@ -79,8 +79,8 @@ SSStructuredIndexFormat::CreateStructuredIndex(const milvus::engine::meta::hybri
}
void
SSStructuredIndexFormat::Read(const milvus::storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::IndexPtr& index) {
StructuredIndexFormat::Read(const milvus::storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::IndexPtr& index) {
milvus::TimeRecorder recorder("SSStructuredIndexFormat::Read");
knowhere::BinarySet load_data_list;
......@@ -141,8 +141,8 @@ SSStructuredIndexFormat::Read(const milvus::storage::FSHandlerPtr& fs_ptr, const
}
void
SSStructuredIndexFormat::Write(const milvus::storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
engine::meta::hybrid::DataType data_type, const knowhere::IndexPtr& index) {
StructuredIndexFormat::Write(const milvus::storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
engine::meta::hybrid::DataType data_type, const knowhere::IndexPtr& index) {
milvus::TimeRecorder recorder("SSStructuredIndexFormat::Write");
std::string full_file_path = file_path + STRUCTURED_INDEX_POSTFIX;
......
......@@ -28,9 +28,9 @@
namespace milvus {
namespace codec {
class SSStructuredIndexFormat {
class StructuredIndexFormat {
public:
SSStructuredIndexFormat() = default;
StructuredIndexFormat() = default;
std::string
FilePostfix();
......@@ -43,20 +43,20 @@ class SSStructuredIndexFormat {
const knowhere::IndexPtr& index);
// No copy and move
SSStructuredIndexFormat(const SSStructuredIndexFormat&) = delete;
SSStructuredIndexFormat(SSStructuredIndexFormat&&) = delete;
StructuredIndexFormat(const StructuredIndexFormat&) = delete;
StructuredIndexFormat(StructuredIndexFormat&&) = delete;
SSStructuredIndexFormat&
operator=(const SSStructuredIndexFormat&) = delete;
SSStructuredIndexFormat&
operator=(SSStructuredIndexFormat&&) = delete;
StructuredIndexFormat&
operator=(const StructuredIndexFormat&) = delete;
StructuredIndexFormat&
operator=(StructuredIndexFormat&&) = delete;
private:
knowhere::IndexPtr
CreateStructuredIndex(const engine::meta::hybrid::DataType data_type);
};
using SSStructuredIndexFormatPtr = std::shared_ptr<SSStructuredIndexFormat>;
using StructuredIndexFormatPtr = std::shared_ptr<StructuredIndexFormat>;
} // namespace codec
} // namespace milvus
......@@ -18,7 +18,7 @@
#include <boost/filesystem.hpp>
#include <memory>
#include "codecs/snapshot/SSVectorCompressFormat.h"
#include "codecs/VectorCompressFormat.h"
#include "knowhere/common/BinarySet.h"
#include "utils/Exception.h"
#include "utils/Log.h"
......@@ -30,14 +30,14 @@ namespace codec {
const char* VECTOR_COMPRESS_POSTFIX = ".cmp";
std::string
SSVectorCompressFormat::FilePostfix() {
VectorCompressFormat::FilePostfix() {
std::string str = VECTOR_COMPRESS_POSTFIX;
return str;
}
void
SSVectorCompressFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::BinaryPtr& compress) {
VectorCompressFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::BinaryPtr& compress) {
milvus::TimeRecorder recorder("SSVectorCompressFormat::Read");
const std::string full_file_path = file_path + VECTOR_COMPRESS_POSTFIX;
......@@ -65,8 +65,8 @@ SSVectorCompressFormat::Read(const storage::FSHandlerPtr& fs_ptr, const std::str
}
void
SSVectorCompressFormat::Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const knowhere::BinaryPtr& compress) {
VectorCompressFormat::Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const knowhere::BinaryPtr& compress) {
milvus::TimeRecorder recorder("SSVectorCompressFormat::Write");
const std::string full_file_path = file_path + VECTOR_COMPRESS_POSTFIX;
......
......@@ -28,11 +28,25 @@ namespace codec {
class VectorCompressFormat {
public:
virtual void
read(const storage::FSHandlerPtr& fs_ptr, const std::string& location, knowhere::BinaryPtr& compress) = 0;
VectorCompressFormat() = default;
virtual void
write(const storage::FSHandlerPtr& fs_ptr, const std::string& location, const knowhere::BinaryPtr& compress) = 0;
std::string
FilePostfix();
void
Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, knowhere::BinaryPtr& compress);
void
Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, const knowhere::BinaryPtr& compress);
// No copy and move
VectorCompressFormat(const VectorCompressFormat&) = delete;
VectorCompressFormat(VectorCompressFormat&&) = delete;
VectorCompressFormat&
operator=(const VectorCompressFormat&) = delete;
VectorCompressFormat&
operator=(VectorCompressFormat&&) = delete;
};
using VectorCompressFormatPtr = std::shared_ptr<VectorCompressFormat>;
......
......@@ -18,8 +18,8 @@
#include <boost/filesystem.hpp>
#include <memory>
#include "codecs/snapshot/SSCodec.h"
#include "codecs/snapshot/SSVectorIndexFormat.h"
#include "codecs/Codec.h"
#include "codecs/VectorIndexFormat.h"
#include "knowhere/common/BinarySet.h"
#include "knowhere/index/vector_index/VecIndex.h"
#include "knowhere/index/vector_index/VecIndexFactory.h"
......@@ -33,14 +33,14 @@ namespace codec {
const char* VECTOR_INDEX_POSTFIX = ".idx";
std::string
SSVectorIndexFormat::FilePostfix() {
VectorIndexFormat::FilePostfix() {
std::string str = VECTOR_INDEX_POSTFIX;
return str;
}
void
SSVectorIndexFormat::ReadRaw(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::BinaryPtr& data) {
VectorIndexFormat::ReadRaw(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::BinaryPtr& data) {
milvus::TimeRecorder recorder("SSVectorIndexFormat::ReadRaw");
if (!fs_ptr->reader_ptr_->open(file_path.c_str())) {
......@@ -67,8 +67,8 @@ SSVectorIndexFormat::ReadRaw(const storage::FSHandlerPtr& fs_ptr, const std::str
}
void
SSVectorIndexFormat::ReadIndex(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::BinarySet& data) {
VectorIndexFormat::ReadIndex(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::BinarySet& data) {
milvus::TimeRecorder recorder("SSVectorIndexFormat::ReadIndex");
std::string full_file_path = file_path + VECTOR_INDEX_POSTFIX;
......@@ -121,23 +121,23 @@ SSVectorIndexFormat::ReadIndex(const storage::FSHandlerPtr& fs_ptr, const std::s
}
void
SSVectorIndexFormat::ReadCompress(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::BinaryPtr& data) {
auto& ss_codec = codec::SSCodec::instance();
VectorIndexFormat::ReadCompress(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::BinaryPtr& data) {
auto& ss_codec = codec::Codec::instance();
ss_codec.GetVectorCompressFormat()->Read(fs_ptr, file_path, data);
}
void
SSVectorIndexFormat::ConvertRaw(const std::vector<uint8_t>& raw, knowhere::BinaryPtr& data) {
VectorIndexFormat::ConvertRaw(const std::vector<uint8_t>& raw, knowhere::BinaryPtr& data) {
data = std::make_shared<knowhere::Binary>();
data->size = raw.size();
data->data = std::shared_ptr<uint8_t[]>(new uint8_t[data->size]);
}
void
SSVectorIndexFormat::ConstructIndex(const std::string& index_name, knowhere::BinarySet& index_data,
knowhere::BinaryPtr& raw_data, knowhere::BinaryPtr& compress_data,
knowhere::VecIndexPtr& index) {
VectorIndexFormat::ConstructIndex(const std::string& index_name, knowhere::BinarySet& index_data,
knowhere::BinaryPtr& raw_data, knowhere::BinaryPtr& compress_data,
knowhere::VecIndexPtr& index) {
knowhere::VecIndexFactory& vec_index_factory = knowhere::VecIndexFactory::GetInstance();
index = vec_index_factory.CreateVecIndex(index_name, knowhere::IndexMode::MODE_CPU);
if (index != nullptr) {
......@@ -169,8 +169,8 @@ SSVectorIndexFormat::ConstructIndex(const std::string& index_name, knowhere::Bin
}
void
SSVectorIndexFormat::WriteIndex(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const knowhere::VecIndexPtr& index) {
VectorIndexFormat::WriteIndex(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const knowhere::VecIndexPtr& index) {
milvus::TimeRecorder recorder("SVectorIndexFormat::WriteIndex");
std::string full_file_path = file_path + VECTOR_INDEX_POSTFIX;
......@@ -200,15 +200,15 @@ SSVectorIndexFormat::WriteIndex(const storage::FSHandlerPtr& fs_ptr, const std::
}
void
SSVectorIndexFormat::WriteCompress(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const knowhere::VecIndexPtr& index) {
VectorIndexFormat::WriteCompress(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const knowhere::VecIndexPtr& index) {
milvus::TimeRecorder recorder("SSVectorIndexFormat::WriteCompress");
auto binaryset = index->Serialize(knowhere::Config());
auto sq8_data = binaryset.Erase(SQ8_DATA);
if (sq8_data != nullptr) {
auto& ss_codec = codec::SSCodec::instance();
auto& ss_codec = codec::Codec::instance();
ss_codec.GetVectorCompressFormat()->Write(fs_ptr, file_path, sq8_data);
}
}
......
......@@ -19,24 +19,52 @@
#include <memory>
#include <string>
#include <vector>
#include "segment/VectorIndex.h"
#include "knowhere/index/vector_index/VecIndex.h"
#include "storage/FSHandler.h"
namespace milvus {
namespace codec {
enum ExternalData { ExternalData_None, ExternalData_RawData, ExternalData_SQ8 };
class VectorIndexFormat {
public:
virtual void
read(const storage::FSHandlerPtr& fs_ptr, const std::string& location, ExternalData external_data,
segment::VectorIndexPtr& vector_index) = 0;
VectorIndexFormat() = default;
std::string
FilePostfix();
void
ReadRaw(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, knowhere::BinaryPtr& data);
void
ReadIndex(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, knowhere::BinarySet& data);
void
ReadCompress(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, knowhere::BinaryPtr& data);
void
ConvertRaw(const std::vector<uint8_t>& raw, knowhere::BinaryPtr& data);
void
ConstructIndex(const std::string& index_name, knowhere::BinarySet& index_data, knowhere::BinaryPtr& raw_data,
knowhere::BinaryPtr& compress_data, knowhere::VecIndexPtr& index);
void
WriteIndex(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, const knowhere::VecIndexPtr& index);
void
WriteCompress(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const knowhere::VecIndexPtr& index);
// No copy and move
VectorIndexFormat(const VectorIndexFormat&) = delete;
VectorIndexFormat(VectorIndexFormat&&) = delete;
virtual void
write(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
const segment::VectorIndexPtr& vector_index) = 0;
VectorIndexFormat&
operator=(const VectorIndexFormat&) = delete;
VectorIndexFormat&
operator=(VectorIndexFormat&&) = delete;
};
using VectorIndexFormatPtr = std::shared_ptr<VectorIndexFormat>;
......
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <vector>
#include "index/knowhere/knowhere/common/BinarySet.h"
#include "segment/Vectors.h"
#include "storage/FSHandler.h"
namespace milvus {
namespace codec {
class VectorsFormat {
public:
virtual void
read(const storage::FSHandlerPtr& fs_ptr, segment::VectorsPtr& vectors_read) = 0;
virtual void
write(const storage::FSHandlerPtr& fs_ptr, const segment::VectorsPtr& vectors) = 0;
virtual void
read_uids(const storage::FSHandlerPtr& fs_ptr, std::vector<segment::doc_id_t>& uids) = 0;
virtual void
read_vectors(const storage::FSHandlerPtr& fs_ptr, knowhere::BinaryPtr& raw_vectors) = 0;
virtual void
read_vectors(const storage::FSHandlerPtr& fs_ptr, off_t offset, size_t num_bytes,
std::vector<uint8_t>& raw_vectors) = 0;
};
using VectorsFormatPtr = std::shared_ptr<VectorsFormat>;
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "codecs/default/DefaultAttrsFormat.h"
#include <fcntl.h>
#include <fiu-local.h>
#include <unistd.h>
#include <algorithm>
#include <memory>
#include <boost/filesystem.hpp>
#include "utils/Exception.h"
#include "utils/Log.h"
#include "utils/TimeRecorder.h"
namespace milvus {
namespace codec {
void
DefaultAttrsFormat::read_attrs_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, off_t offset,
size_t num, std::vector<uint8_t>& raw_attrs, size_t& nbytes) {
auto open_res = fs_ptr->reader_ptr_->open(file_path.c_str());
fiu_do_on("read_attrs_internal_open_file_fail", open_res = false);
if (!open_res) {
std::string err_msg = "Failed to open file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_CREATE_FILE, err_msg);
}
fs_ptr->reader_ptr_->read(&nbytes, sizeof(size_t));
num = std::min(num, nbytes - offset);
offset += sizeof(size_t);
fs_ptr->reader_ptr_->seekg(offset);
raw_attrs.resize(num / sizeof(uint8_t));
fs_ptr->reader_ptr_->read(raw_attrs.data(), num);
fs_ptr->reader_ptr_->close();
}
void
DefaultAttrsFormat::read_uids_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
std::vector<int64_t>& uids) {
auto open_res = fs_ptr->reader_ptr_->open(file_path.c_str());
fiu_do_on("read_uids_internal_open_file_fail", open_res = false);
if (!open_res) {
std::string err_msg = "Failed to open file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_CREATE_FILE, err_msg);
}
size_t num_bytes;
fs_ptr->reader_ptr_->read(&num_bytes, sizeof(size_t));
uids.resize(num_bytes / sizeof(int64_t));
fs_ptr->reader_ptr_->read(uids.data(), num_bytes);
fs_ptr->reader_ptr_->read(uids.data(), num_bytes);
}
void
DefaultAttrsFormat::read(const milvus::storage::FSHandlerPtr& fs_ptr, milvus::segment::AttrsPtr& attrs_read) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
auto is_directory = boost::filesystem::is_directory(dir_path);
fiu_do_on("read_id_directory_false", is_directory = false);
if (!is_directory) {
std::string err_msg = "Directory: " + dir_path + "does not exist";
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_INVALID_ARGUMENT, err_msg);
}
boost::filesystem::path target_path(dir_path);
typedef boost::filesystem::directory_iterator d_it;
d_it it_end;
d_it uid_it(target_path);
std::vector<int64_t> uids;
for (; uid_it != it_end; ++uid_it) {
const auto& path = uid_it->path();
if (path.extension().string() == user_id_extension_) {
read_uids_internal(fs_ptr, path.string(), uids);
break;
}
}
d_it it(target_path);
for (; it != it_end; ++it) {
const auto& path = it->path();
if (path.extension().string() == raw_attr_extension_) {
auto file_name = path.filename().string();
auto field_name = file_name.substr(0, file_name.size() - 3);
std::vector<uint8_t> attr_list;
size_t nbytes;
read_attrs_internal(fs_ptr, path.string(), 0, INT64_MAX, attr_list, nbytes);
milvus::segment::AttrPtr attr =
std::make_shared<milvus::segment::Attr>(attr_list, nbytes, uids, field_name);
attrs_read->attrs.insert(std::pair(field_name, attr));
}
}
}
void
DefaultAttrsFormat::write(const milvus::storage::FSHandlerPtr& fs_ptr, const milvus::segment::AttrsPtr& attrs_ptr) {
TimeRecorder rc("write attributes");
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
auto it = attrs_ptr->attrs.begin();
if (it == attrs_ptr->attrs.end()) {
// std::string err_msg = "Attributes is null";
// LOG_ENGINE_ERROR_ << err_msg;
return;
}
#if 0
const std::string uid_file_path = dir_path + "/" + it->second->GetCollectionId() + user_id_extension_;
int uid_fd = open(uid_file_path.c_str(), O_WRONLY | O_TRUNC | O_CREAT, 00664);
if (uid_fd == -1) {
std::string err_msg = "Failed to open file: " + uid_file_path + ", error: " + std::strerror(errno);
ENGINE_LOG_ERROR << err_msg;
throw Exception(SERVER_CANNOT_CREATE_FILE, err_msg);
}
size_t uid_num_bytes = it->second->GetUids().size() * sizeof(int64_t);
if (::write(uid_fd, &uid_num_bytes, sizeof(size_t)) == -1) {
std::string err_msg = "Failed to write to file" + uid_file_path + ", error: " + std::strerror(errno);
ENGINE_LOG_ERROR << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
if (::write(uid_fd, it->second->GetUids().data(), uid_num_bytes) == -1) {
std::string err_msg = "Failed to write to file" + uid_file_path + ", error: " + std::strerror(errno);
ENGINE_LOG_ERROR << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
if (::close(uid_fd) == -1) {
std::string err_msg = "Failed to close file: " + uid_file_path + ", error: " + std::strerror(errno);
ENGINE_LOG_ERROR << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
rc.RecordSection("write uids done");
#endif
for (; it != attrs_ptr->attrs.end(); it++) {
const std::string ra_file_path = dir_path + "/" + it->second->GetName() + raw_attr_extension_;
int ra_fd = open(ra_file_path.c_str(), O_WRONLY | O_TRUNC | O_CREAT, 00664);
if (ra_fd == -1) {
std::string err_msg = "Failed to open file: " + ra_file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_CREATE_FILE, err_msg);
}
size_t ra_num_bytes = it->second->GetNbytes();
if (::write(ra_fd, &ra_num_bytes, sizeof(size_t)) == -1) {
std::string err_msg = "Failed to write to file: " + ra_file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
if (::write(ra_fd, it->second->GetData().data(), ra_num_bytes) == -1) {
std::string err_msg = "Failed to write to file: " + ra_file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
if (::close(ra_fd) == -1) {
std::string err_msg = "Failed to close file: " + ra_file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
rc.RecordSection("write rv done");
}
}
void
DefaultAttrsFormat::read_attrs(const milvus::storage::FSHandlerPtr& fs_ptr, const std::string& field_name, off_t offset,
size_t num_bytes, std::vector<uint8_t>& raw_attrs) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
if (!boost::filesystem::is_directory(dir_path)) {
std::string err_msg = "Directory: " + dir_path + "does not exist";
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_INVALID_ARGUMENT, err_msg);
}
boost::filesystem::path target_path(dir_path);
typedef boost::filesystem::directory_iterator d_it;
d_it it_end;
d_it it(target_path);
for (; it != it_end; ++it) {
const auto& path = it->path();
std::string file_name = path.filename().string();
if (path.extension().string() == raw_attr_extension_ &&
file_name.substr(0, file_name.size() - 3) == field_name) {
size_t nbytes;
read_attrs_internal(fs_ptr, path.string(), offset, num_bytes, raw_attrs, nbytes);
}
}
}
void
DefaultAttrsFormat::read_uids(const milvus::storage::FSHandlerPtr& fs_ptr, std::vector<int64_t>& uids) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
auto is_directory = boost::filesystem::is_directory(dir_path);
fiu_do_on("is_directory_false", is_directory = false);
if (!is_directory) {
std::string err_msg = "Directory: " + dir_path + "does not exist";
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_INVALID_ARGUMENT, err_msg);
}
boost::filesystem::path target_path(dir_path);
typedef boost::filesystem::directory_iterator d_it;
d_it it_end;
d_it it(target_path);
// for (auto& it : boost::filesystem::directory_iterator(dir_path)) {
for (; it != it_end; ++it) {
const auto& path = it->path();
if (path.extension().string() == user_id_extension_) {
read_uids_internal(fs_ptr, path.string(), uids);
}
}
}
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <string>
#include <vector>
#include "codecs/AttrsFormat.h"
#include "segment/Attrs.h"
namespace milvus {
namespace codec {
class DefaultAttrsFormat : public AttrsFormat {
public:
DefaultAttrsFormat() = default;
void
read(const storage::FSHandlerPtr& fs_ptr, segment::AttrsPtr& attrs_read) override;
void
write(const storage::FSHandlerPtr& fs_ptr, const segment::AttrsPtr& attr) override;
void
read_attrs(const storage::FSHandlerPtr& fs_ptr, const std::string& field_name, off_t offset, size_t num_bytes,
std::vector<uint8_t>& raw_attrs) override;
void
read_uids(const storage::FSHandlerPtr& fs_ptr, std::vector<int64_t>& uids) override;
// No copy and move
DefaultAttrsFormat(const DefaultAttrsFormat&) = delete;
DefaultAttrsFormat(DefaultAttrsFormat&&) = delete;
DefaultAttrsFormat&
operator=(const DefaultAttrsFormat&) = delete;
DefaultAttrsFormat&
operator=(DefaultAttrsFormat&&) = delete;
private:
void
read_attrs_internal(const storage::FSHandlerPtr& fs_ptr, const std::string&, off_t, size_t, std::vector<uint8_t>&,
size_t&);
void
read_uids_internal(const storage::FSHandlerPtr& fs_ptr, const std::string&, std::vector<int64_t>&);
private:
const std::string raw_attr_extension_ = ".ra";
const std::string user_id_extension_ = ".uid";
};
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "codecs/default/DefaultAttrsIndexFormat.h"
#include <fcntl.h>
#include <unistd.h>
#include <algorithm>
#include <boost/filesystem.hpp>
#include <memory>
#include <utility>
#include "db/meta/MetaTypes.h"
#include "knowhere/index/structured_index/StructuredIndexSort.h"
#include "utils/Exception.h"
#include "utils/Log.h"
#include "utils/TimeRecorder.h"
namespace milvus {
namespace codec {
knowhere::IndexPtr
DefaultAttrsIndexFormat::create_structured_index(const milvus::engine::meta::hybrid::DataType data_type) {
knowhere::IndexPtr index = nullptr;
switch (data_type) {
case engine::meta::hybrid::DataType::INT8: {
index = std::make_shared<knowhere::StructuredIndexSort<int8_t>>();
break;
}
case engine::meta::hybrid::DataType::INT16: {
index = std::make_shared<knowhere::StructuredIndexSort<int16_t>>();
break;
}
case engine::meta::hybrid::DataType::INT32: {
index = std::make_shared<knowhere::StructuredIndexSort<int32_t>>();
break;
}
case engine::meta::hybrid::DataType::INT64: {
index = std::make_shared<knowhere::StructuredIndexSort<int64_t>>();
break;
}
case engine::meta::hybrid::DataType::FLOAT: {
index = std::make_shared<knowhere::StructuredIndexSort<float>>();
break;
}
case engine::meta::hybrid::DataType::DOUBLE: {
index = std::make_shared<knowhere::StructuredIndexSort<double>>();
break;
}
default: {
LOG_ENGINE_ERROR_ << "Invalid field type";
return nullptr;
}
}
return index;
}
void
DefaultAttrsIndexFormat::read_internal(const milvus::storage::FSHandlerPtr& fs_ptr, const std::string& path,
knowhere::IndexPtr& index, engine::meta::hybrid::DataType& attr_type) {
milvus::TimeRecorder recorder("read_index");
knowhere::BinarySet load_data_list;
recorder.RecordSection("Start");
if (!fs_ptr->reader_ptr_->open(path)) {
LOG_ENGINE_ERROR_ << "Fail to open attribute index: " << path;
return;
}
int64_t length = fs_ptr->reader_ptr_->length();
if (length <= 0) {
LOG_ENGINE_ERROR_ << "Invalid attr index length: " << path;
return;
}
size_t rp = 0;
fs_ptr->reader_ptr_->seekg(0);
int32_t data_type = 0;
fs_ptr->reader_ptr_->read(&data_type, sizeof(data_type));
rp += sizeof(data_type);
fs_ptr->reader_ptr_->seekg(rp);
attr_type = (engine::meta::hybrid::DataType)data_type;
LOG_ENGINE_DEBUG_ << "Start to read_index(" << path << ") length: " << length << " bytes";
while (rp < length) {
size_t meta_length;
fs_ptr->reader_ptr_->read(&meta_length, sizeof(meta_length));
rp += sizeof(meta_length);
fs_ptr->reader_ptr_->seekg(rp);
auto meta = new char[meta_length];
fs_ptr->reader_ptr_->read(meta, meta_length);
rp += meta_length;
fs_ptr->reader_ptr_->seekg(rp);
size_t bin_length;
fs_ptr->reader_ptr_->read(&bin_length, sizeof(bin_length));
rp += sizeof(bin_length);
fs_ptr->reader_ptr_->seekg(rp);
auto bin = new uint8_t[bin_length];
fs_ptr->reader_ptr_->read(bin, bin_length);
rp += bin_length;
fs_ptr->reader_ptr_->seekg(rp);
std::shared_ptr<uint8_t[]> binptr(bin);
load_data_list.Append(std::string(meta, meta_length), binptr, bin_length);
delete[] meta;
}
fs_ptr->reader_ptr_->close();
double span = recorder.RecordSection("End");
double rate = length * 1000000.0 / span / 1024 / 1024;
LOG_ENGINE_DEBUG_ << "read_index(" << path << ") rate " << rate << "MB/s";
index = create_structured_index((engine::meta::hybrid::DataType)data_type);
index->Load(load_data_list);
return;
}
void
DefaultAttrsIndexFormat::read(const milvus::storage::FSHandlerPtr& fs_ptr,
milvus::segment::AttrsIndexPtr& attrs_index) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
if (!boost::filesystem::is_directory(dir_path)) {
std::string err_msg = "Directory: " + dir_path + "does not exist";
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_INVALID_ARGUMENT, err_msg);
}
boost::filesystem::path target_path(dir_path);
typedef boost::filesystem::directory_iterator d_it;
d_it it_end;
d_it it(target_path);
for (; it != it_end; ++it) {
const auto& path = it->path();
if (path.extension().string() == attr_index_extension_) {
auto file_name = path.filename().string();
auto field_name = file_name.substr(0, file_name.size() - 4);
knowhere::IndexPtr index = nullptr;
engine::meta::hybrid::DataType data_type;
read_internal(fs_ptr, path.string(), index, data_type);
auto attr_index = std::make_shared<milvus::segment::AttrIndex>(index, data_type, field_name);
attrs_index->attr_indexes.insert(std::make_pair(field_name, attr_index));
}
}
}
void
DefaultAttrsIndexFormat::write(const milvus::storage::FSHandlerPtr& fs_ptr,
const milvus::segment::AttrsIndexPtr& attrs_index) {
milvus::TimeRecorder recorder("write_index");
recorder.RecordSection("Start");
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
auto attr_it = attrs_index->attr_indexes.begin();
for (; attr_it != attrs_index->attr_indexes.end(); attr_it++) {
auto field_name = attr_it->first;
const std::string file_path = dir_path + "/" + field_name + attr_index_extension_;
knowhere::IndexPtr index = attr_it->second->GetAttrIndex();
int32_t data_type = (int32_t)attr_it->second->GetDataType();
auto binaryset = index->Serialize(knowhere::Config());
if (!fs_ptr->writer_ptr_->open(file_path)) {
LOG_ENGINE_ERROR_ << "Fail to open attribute index: " << file_path;
return;
}
fs_ptr->writer_ptr_->write(&data_type, sizeof(data_type));
for (auto& iter : binaryset.binary_map_) {
auto meta = iter.first.c_str();
size_t meta_length = iter.first.length();
fs_ptr->writer_ptr_->write(&meta_length, sizeof(meta_length));
fs_ptr->writer_ptr_->write((void*)meta, meta_length);
auto binary = iter.second;
int64_t binary_length = binary->size;
fs_ptr->writer_ptr_->write(&binary_length, sizeof(binary_length));
fs_ptr->writer_ptr_->write((void*)binary->data.get(), binary_length);
}
}
fs_ptr->writer_ptr_->close();
double span = recorder.RecordSection("End");
double rate = fs_ptr->writer_ptr_->length() * 1000000.0 / span / 1024 / 1024;
LOG_ENGINE_DEBUG_ << "write_index(" << dir_path << ") rate " << rate << "MB/s";
}
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <src/db/meta/MetaTypes.h>
#include <string>
#include <vector>
#include "codecs/AttrsIndexFormat.h"
#include "segment/AttrsIndex.h"
namespace milvus {
namespace codec {
class DefaultAttrsIndexFormat : public AttrsIndexFormat {
public:
DefaultAttrsIndexFormat() = default;
void
read(const storage::FSHandlerPtr& fs_ptr, segment::AttrsIndexPtr& attr_index) override;
void
write(const storage::FSHandlerPtr& fs_ptr, const segment::AttrsIndexPtr& attr_index) override;
// No copy and move
DefaultAttrsIndexFormat(const DefaultAttrsIndexFormat&) = delete;
DefaultAttrsIndexFormat(DefaultAttrsIndexFormat&&) = delete;
DefaultAttrsIndexFormat&
operator=(const DefaultAttrsIndexFormat&) = delete;
DefaultAttrsIndexFormat&
operator=(DefaultAttrsIndexFormat&&) = delete;
private:
void
read_internal(const milvus::storage::FSHandlerPtr& fs_ptr, const std::string& path, knowhere::IndexPtr& index,
engine::meta::hybrid::DataType& attr_type);
knowhere::IndexPtr
create_structured_index(const engine::meta::hybrid::DataType data_type);
private:
const std::string attr_index_extension_ = ".idx";
};
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "codecs/default/DefaultCodec.h"
#include <memory>
#include "DefaultAttrsFormat.h"
#include "DefaultAttrsIndexFormat.h"
#include "DefaultDeletedDocsFormat.h"
#include "DefaultIdBloomFilterFormat.h"
#include "DefaultVectorCompressFormat.h"
#include "DefaultVectorIndexFormat.h"
#include "DefaultVectorsFormat.h"
namespace milvus {
namespace codec {
DefaultCodec&
DefaultCodec::instance() {
static DefaultCodec s_instance;
return s_instance;
}
DefaultCodec::DefaultCodec() {
vectors_format_ptr_ = std::make_shared<DefaultVectorsFormat>();
attrs_format_ptr_ = std::make_shared<DefaultAttrsFormat>();
vector_index_format_ptr_ = std::make_shared<DefaultVectorIndexFormat>();
attrs_index_format_ptr_ = std::make_shared<DefaultAttrsIndexFormat>();
deleted_docs_format_ptr_ = std::make_shared<DefaultDeletedDocsFormat>();
id_bloom_filter_format_ptr_ = std::make_shared<DefaultIdBloomFilterFormat>();
vector_compress_format_ptr_ = std::make_shared<DefaultVectorCompressFormat>();
}
VectorsFormatPtr
DefaultCodec::GetVectorsFormat() {
return vectors_format_ptr_;
}
AttrsFormatPtr
DefaultCodec::GetAttrsFormat() {
return attrs_format_ptr_;
}
VectorIndexFormatPtr
DefaultCodec::GetVectorIndexFormat() {
return vector_index_format_ptr_;
}
AttrsIndexFormatPtr
DefaultCodec::GetAttrsIndexFormat() {
return attrs_index_format_ptr_;
}
DeletedDocsFormatPtr
DefaultCodec::GetDeletedDocsFormat() {
return deleted_docs_format_ptr_;
}
IdBloomFilterFormatPtr
DefaultCodec::GetIdBloomFilterFormat() {
return id_bloom_filter_format_ptr_;
}
VectorCompressFormatPtr
DefaultCodec::GetVectorCompressFormat() {
return vector_compress_format_ptr_;
}
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "codecs/Codec.h"
namespace milvus {
namespace codec {
class DefaultCodec : public Codec {
public:
static DefaultCodec&
instance();
VectorsFormatPtr
GetVectorsFormat() override;
AttrsFormatPtr
GetAttrsFormat() override;
VectorIndexFormatPtr
GetVectorIndexFormat() override;
AttrsIndexFormatPtr
GetAttrsIndexFormat() override;
DeletedDocsFormatPtr
GetDeletedDocsFormat() override;
IdBloomFilterFormatPtr
GetIdBloomFilterFormat() override;
VectorCompressFormatPtr
GetVectorCompressFormat() override;
private:
DefaultCodec();
private:
VectorsFormatPtr vectors_format_ptr_;
AttrsFormatPtr attrs_format_ptr_;
VectorIndexFormatPtr vector_index_format_ptr_;
AttrsIndexFormatPtr attrs_index_format_ptr_;
DeletedDocsFormatPtr deleted_docs_format_ptr_;
IdBloomFilterFormatPtr id_bloom_filter_format_ptr_;
VectorCompressFormatPtr vector_compress_format_ptr_;
};
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "codecs/default/DefaultDeletedDocsFormat.h"
#include <fcntl.h>
#include <unistd.h>
#define BOOST_NO_CXX11_SCOPED_ENUMS
#include <boost/filesystem.hpp>
#undef BOOST_NO_CXX11_SCOPED_ENUMS
#include <memory>
#include <string>
#include <vector>
#include "segment/Types.h"
#include "utils/Exception.h"
#include "utils/Log.h"
namespace milvus {
namespace codec {
void
DefaultDeletedDocsFormat::read(const storage::FSHandlerPtr& fs_ptr, segment::DeletedDocsPtr& deleted_docs) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
const std::string del_file_path = dir_path + "/" + deleted_docs_filename_;
int del_fd = open(del_file_path.c_str(), O_RDONLY, 00664);
if (del_fd == -1) {
std::string err_msg = "Failed to open file: " + del_file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_CREATE_FILE, err_msg);
}
size_t num_bytes;
if (::read(del_fd, &num_bytes, sizeof(size_t)) == -1) {
std::string err_msg = "Failed to read from file: " + del_file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
auto deleted_docs_size = num_bytes / sizeof(segment::offset_t);
std::vector<segment::offset_t> deleted_docs_list;
deleted_docs_list.resize(deleted_docs_size);
if (::read(del_fd, deleted_docs_list.data(), num_bytes) == -1) {
std::string err_msg = "Failed to read from file: " + del_file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
deleted_docs = std::make_shared<segment::DeletedDocs>(deleted_docs_list);
if (::close(del_fd) == -1) {
std::string err_msg = "Failed to close file: " + del_file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
}
void
DefaultDeletedDocsFormat::write(const storage::FSHandlerPtr& fs_ptr, const segment::DeletedDocsPtr& deleted_docs) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
const std::string del_file_path = dir_path + "/" + deleted_docs_filename_;
// Create a temporary file from the existing file
const std::string temp_path = dir_path + "/" + "temp_del";
bool exists = boost::filesystem::exists(del_file_path);
if (exists) {
boost::filesystem::copy_file(del_file_path, temp_path, boost::filesystem::copy_option::fail_if_exists);
}
// Write to the temp file, in order to avoid possible race condition with search (concurrent read and write)
int del_fd = open(temp_path.c_str(), O_RDWR | O_CREAT, 00664);
if (del_fd == -1) {
std::string err_msg = "Failed to open file: " + temp_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_CREATE_FILE, err_msg);
}
size_t old_num_bytes;
if (exists) {
if (::read(del_fd, &old_num_bytes, sizeof(size_t)) == -1) {
std::string err_msg = "Failed to read from file: " + temp_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
} else {
old_num_bytes = 0;
}
auto deleted_docs_list = deleted_docs->GetDeletedDocs();
size_t new_num_bytes = old_num_bytes + sizeof(segment::offset_t) * deleted_docs->GetSize();
// rewind and overwrite with the new_num_bytes
int off = lseek(del_fd, 0, SEEK_SET);
if (off == -1) {
std::string err_msg = "Failed to seek file: " + temp_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
if (::write(del_fd, &new_num_bytes, sizeof(size_t)) == -1) {
std::string err_msg = "Failed to write to file" + temp_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
// Move to the end of file and append
off = lseek(del_fd, 0, SEEK_END);
if (off == -1) {
std::string err_msg = "Failed to seek file: " + temp_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
if (::write(del_fd, deleted_docs_list.data(), sizeof(segment::offset_t) * deleted_docs->GetSize()) == -1) {
std::string err_msg = "Failed to write to file" + temp_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
if (::close(del_fd) == -1) {
std::string err_msg = "Failed to close file: " + temp_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
// Move temp file to delete file
boost::filesystem::rename(temp_path, del_file_path);
}
void
DefaultDeletedDocsFormat::readSize(const storage::FSHandlerPtr& fs_ptr, size_t& size) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
const std::string del_file_path = dir_path + "/" + deleted_docs_filename_;
int del_fd = open(del_file_path.c_str(), O_RDONLY, 00664);
if (del_fd == -1) {
std::string err_msg = "Failed to open file: " + del_file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_CREATE_FILE, err_msg);
}
size_t num_bytes;
if (::read(del_fd, &num_bytes, sizeof(size_t)) == -1) {
std::string err_msg = "Failed to read from file: " + del_file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
size = num_bytes / sizeof(segment::offset_t);
if (::close(del_fd) == -1) {
std::string err_msg = "Failed to close file: " + del_file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_WRITE_ERROR, err_msg);
}
}
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <string>
#include "codecs/DeletedDocsFormat.h"
namespace milvus {
namespace codec {
class DefaultDeletedDocsFormat : public DeletedDocsFormat {
public:
DefaultDeletedDocsFormat() = default;
void
read(const storage::FSHandlerPtr& fs_ptr, segment::DeletedDocsPtr& deleted_docs) override;
void
write(const storage::FSHandlerPtr& fs_ptr, const segment::DeletedDocsPtr& deleted_docs) override;
void
readSize(const storage::FSHandlerPtr& fs_ptr, size_t& size) override;
// No copy and move
DefaultDeletedDocsFormat(const DefaultDeletedDocsFormat&) = delete;
DefaultDeletedDocsFormat(DefaultDeletedDocsFormat&&) = delete;
DefaultDeletedDocsFormat&
operator=(const DefaultDeletedDocsFormat&) = delete;
DefaultDeletedDocsFormat&
operator=(DefaultDeletedDocsFormat&&) = delete;
private:
const std::string deleted_docs_filename_ = "deleted_docs";
};
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "codecs/default/DefaultIdBloomFilterFormat.h"
#include <fiu-local.h>
#include <memory>
#include <string>
#include "utils/Exception.h"
#include "utils/Log.h"
namespace milvus {
namespace codec {
constexpr unsigned int bloom_filter_capacity = 500000;
constexpr double bloom_filter_error_rate = 0.01;
void
DefaultIdBloomFilterFormat::read(const storage::FSHandlerPtr& fs_ptr, segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
const std::string bloom_filter_file_path = dir_path + "/" + bloom_filter_filename_;
scaling_bloom_t* bloom_filter =
new_scaling_bloom_from_file(bloom_filter_capacity, bloom_filter_error_rate, bloom_filter_file_path.c_str());
fiu_do_on("bloom_filter_nullptr", bloom_filter = nullptr);
if (bloom_filter == nullptr) {
std::string err_msg =
"Failed to read bloom filter from file: " + bloom_filter_file_path + ". " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_UNEXPECTED_ERROR, err_msg);
}
id_bloom_filter_ptr = std::make_shared<segment::IdBloomFilter>(bloom_filter);
}
void
DefaultIdBloomFilterFormat::write(const storage::FSHandlerPtr& fs_ptr,
const segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
const std::string bloom_filter_file_path = dir_path + "/" + bloom_filter_filename_;
if (scaling_bloom_flush(id_bloom_filter_ptr->GetBloomFilter()) == -1) {
std::string err_msg =
"Failed to write bloom filter to file: " + bloom_filter_file_path + ". " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_UNEXPECTED_ERROR, err_msg);
}
}
void
DefaultIdBloomFilterFormat::create(const storage::FSHandlerPtr& fs_ptr,
segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
const std::string bloom_filter_file_path = dir_path + "/" + bloom_filter_filename_;
scaling_bloom_t* bloom_filter =
new_scaling_bloom(bloom_filter_capacity, bloom_filter_error_rate, bloom_filter_file_path.c_str());
if (bloom_filter == nullptr) {
std::string err_msg =
"Failed to read bloom filter from file: " + bloom_filter_file_path + ". " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_UNEXPECTED_ERROR, err_msg);
}
id_bloom_filter_ptr = std::make_shared<segment::IdBloomFilter>(bloom_filter);
}
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <string>
#include "codecs/IdBloomFilterFormat.h"
#include "segment/IdBloomFilter.h"
#include "storage/disk/DiskOperation.h"
namespace milvus {
namespace codec {
class DefaultIdBloomFilterFormat : public IdBloomFilterFormat {
public:
DefaultIdBloomFilterFormat() = default;
void
read(const storage::FSHandlerPtr& fs_ptr, segment::IdBloomFilterPtr& id_bloom_filter_ptr) override;
void
write(const storage::FSHandlerPtr& fs_ptr, const segment::IdBloomFilterPtr& id_bloom_filter_ptr) override;
void
create(const storage::FSHandlerPtr& fs_ptr, segment::IdBloomFilterPtr& id_bloom_filter_ptr) override;
// No copy and move
DefaultIdBloomFilterFormat(const DefaultIdBloomFilterFormat&) = delete;
DefaultIdBloomFilterFormat(DefaultIdBloomFilterFormat&&) = delete;
DefaultIdBloomFilterFormat&
operator=(const DefaultIdBloomFilterFormat&) = delete;
DefaultIdBloomFilterFormat&
operator=(DefaultIdBloomFilterFormat&&) = delete;
private:
const std::string bloom_filter_filename_ = "bloom_filter";
};
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <boost/filesystem.hpp>
#include <memory>
#include "codecs/default/DefaultVectorCompressFormat.h"
#include "knowhere/common/BinarySet.h"
#include "utils/Exception.h"
#include "utils/Log.h"
#include "utils/TimeRecorder.h"
namespace milvus {
namespace codec {
void
DefaultVectorCompressFormat::read(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
knowhere::BinaryPtr& compress) {
const std::string compress_file_path = location + sq8_vector_extension_;
milvus::TimeRecorder recorder("read_index");
recorder.RecordSection("Start");
if (!fs_ptr->reader_ptr_->open(compress_file_path)) {
LOG_ENGINE_ERROR_ << "Fail to open vector index: " << compress_file_path;
return;
}
int64_t length = fs_ptr->reader_ptr_->length();
if (length <= 0) {
LOG_ENGINE_ERROR_ << "Invalid vector index length: " << compress_file_path;
return;
}
compress = std::make_shared<knowhere::Binary>();
compress->data = std::shared_ptr<uint8_t[]>(new uint8_t[length]);
compress->size = length;
fs_ptr->reader_ptr_->seekg(0);
fs_ptr->reader_ptr_->read(compress->data.get(), length);
fs_ptr->reader_ptr_->close();
double span = recorder.RecordSection("End");
double rate = length * 1000000.0 / span / 1024 / 1024;
LOG_ENGINE_DEBUG_ << "read_compress(" << compress_file_path << ") rate " << rate << "MB/s";
}
void
DefaultVectorCompressFormat::write(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
const knowhere::BinaryPtr& compress) {
const std::string compress_file_path = location + sq8_vector_extension_;
milvus::TimeRecorder recorder("write_index");
recorder.RecordSection("Start");
if (!fs_ptr->writer_ptr_->open(compress_file_path)) {
LOG_ENGINE_ERROR_ << "Fail to open vector compress: " << compress_file_path;
return;
}
fs_ptr->writer_ptr_->write(compress->data.get(), compress->size);
fs_ptr->writer_ptr_->close();
double span = recorder.RecordSection("End");
double rate = compress->size * 1000000.0 / span / 1024 / 1024;
LOG_ENGINE_DEBUG_ << "write_compress(" << compress_file_path << ") rate " << rate << "MB/s";
}
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <string>
#include "codecs/VectorCompressFormat.h"
namespace milvus {
namespace codec {
class DefaultVectorCompressFormat : public VectorCompressFormat {
public:
DefaultVectorCompressFormat() = default;
void
read(const storage::FSHandlerPtr& fs_ptr, const std::string& location, knowhere::BinaryPtr& compress) override;
void
write(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
const knowhere::BinaryPtr& compress) override;
// No copy and move
DefaultVectorCompressFormat(const DefaultVectorCompressFormat&) = delete;
DefaultVectorCompressFormat(DefaultVectorCompressFormat&&) = delete;
DefaultVectorCompressFormat&
operator=(const DefaultVectorCompressFormat&) = delete;
DefaultVectorCompressFormat&
operator=(DefaultVectorCompressFormat&&) = delete;
private:
const std::string sq8_vector_extension_ = ".sq8";
};
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <boost/filesystem.hpp>
#include <memory>
#include "codecs/default/DefaultCodec.h"
#include "codecs/default/DefaultVectorIndexFormat.h"
#include "knowhere/common/BinarySet.h"
#include "knowhere/index/vector_index/VecIndex.h"
#include "knowhere/index/vector_index/VecIndexFactory.h"
#include "segment/VectorIndex.h"
#include "utils/Exception.h"
#include "utils/Log.h"
#include "utils/TimeRecorder.h"
namespace milvus {
namespace codec {
knowhere::VecIndexPtr
DefaultVectorIndexFormat::read_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& path,
const std::string& extern_key, const knowhere::BinaryPtr& extern_data) {
milvus::TimeRecorder recorder("read_index");
knowhere::BinarySet load_data_list;
recorder.RecordSection("Start");
if (!fs_ptr->reader_ptr_->open(path)) {
LOG_ENGINE_ERROR_ << "Fail to open vector index: " << path;
return nullptr;
}
int64_t length = fs_ptr->reader_ptr_->length();
if (length <= 0) {
LOG_ENGINE_ERROR_ << "Invalid vector index length: " << path;
return nullptr;
}
int64_t rp = 0;
fs_ptr->reader_ptr_->seekg(0);
int32_t current_type = 0;
fs_ptr->reader_ptr_->read(&current_type, sizeof(current_type));
rp += sizeof(current_type);
fs_ptr->reader_ptr_->seekg(rp);
LOG_ENGINE_DEBUG_ << "Start to read_index(" << path << ") length: " << length << " bytes";
while (rp < length) {
size_t meta_length;
fs_ptr->reader_ptr_->read(&meta_length, sizeof(meta_length));
rp += sizeof(meta_length);
fs_ptr->reader_ptr_->seekg(rp);
auto meta = new char[meta_length];
fs_ptr->reader_ptr_->read(meta, meta_length);
rp += meta_length;
fs_ptr->reader_ptr_->seekg(rp);
size_t bin_length;
fs_ptr->reader_ptr_->read(&bin_length, sizeof(bin_length));
rp += sizeof(bin_length);
fs_ptr->reader_ptr_->seekg(rp);
auto bin = new uint8_t[bin_length];
fs_ptr->reader_ptr_->read(bin, bin_length);
rp += bin_length;
fs_ptr->reader_ptr_->seekg(rp);
std::shared_ptr<uint8_t[]> binptr(bin);
load_data_list.Append(std::string(meta, meta_length), binptr, bin_length);
delete[] meta;
}
fs_ptr->reader_ptr_->close();
double span = recorder.RecordSection("End");
double rate = length * 1000000.0 / span / 1024 / 1024;
LOG_ENGINE_DEBUG_ << "read_index(" << path << ") rate " << rate << "MB/s";
knowhere::VecIndexFactory& vec_index_factory = knowhere::VecIndexFactory::GetInstance();
auto index =
vec_index_factory.CreateVecIndex(knowhere::OldIndexTypeToStr(current_type), knowhere::IndexMode::MODE_CPU);
if (index != nullptr) {
if (extern_data != nullptr) {
LOG_ENGINE_DEBUG_ << "load index with " << extern_key << " " << extern_data->size;
load_data_list.Append(extern_key, extern_data);
length += extern_data->size;
}
index->Load(load_data_list);
index->UpdateIndexSize();
LOG_ENGINE_DEBUG_ << "index file size " << length << " index size " << index->IndexSize();
} else {
LOG_ENGINE_ERROR_ << "Fail to create vector index: " << path;
}
return index;
}
void
DefaultVectorIndexFormat::read(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
ExternalData externalData, segment::VectorIndexPtr& vector_index) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
if (!boost::filesystem::is_directory(dir_path)) {
std::string err_msg = "Directory: " + dir_path + "does not exist";
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_INVALID_ARGUMENT, err_msg);
}
knowhere::VecIndexPtr index = nullptr;
switch (externalData) {
case ExternalData_None: {
index = read_internal(fs_ptr, location);
break;
}
case ExternalData_RawData: {
auto& default_codec = codec::DefaultCodec::instance();
knowhere::BinaryPtr raw_data = nullptr;
default_codec.GetVectorsFormat()->read_vectors(fs_ptr, raw_data);
index = read_internal(fs_ptr, location, RAW_DATA, raw_data);
break;
}
case ExternalData_SQ8: {
auto& default_codec = codec::DefaultCodec::instance();
knowhere::BinaryPtr sq8_data = nullptr;
default_codec.GetVectorCompressFormat()->read(fs_ptr, location, sq8_data);
index = read_internal(fs_ptr, location, SQ8_DATA, sq8_data);
break;
}
}
vector_index->SetVectorIndex(index);
}
void
DefaultVectorIndexFormat::write(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
const segment::VectorIndexPtr& vector_index) {
milvus::TimeRecorder recorder("write_index");
knowhere::VecIndexPtr index = vector_index->GetVectorIndex();
auto binaryset = index->Serialize(knowhere::Config());
int32_t index_type = knowhere::StrToOldIndexType(index->index_type());
auto sq8_data = binaryset.Erase(SQ8_DATA);
if (sq8_data != nullptr) {
auto& default_codec = codec::DefaultCodec::instance();
default_codec.GetVectorCompressFormat()->write(fs_ptr, location, sq8_data);
}
recorder.RecordSection("Start");
if (!fs_ptr->writer_ptr_->open(location)) {
LOG_ENGINE_ERROR_ << "Fail to open vector index: " << location;
return;
}
fs_ptr->writer_ptr_->write(&index_type, sizeof(index_type));
for (auto& iter : binaryset.binary_map_) {
auto meta = iter.first.c_str();
size_t meta_length = iter.first.length();
fs_ptr->writer_ptr_->write(&meta_length, sizeof(meta_length));
fs_ptr->writer_ptr_->write((void*)meta, meta_length);
auto binary = iter.second;
int64_t binary_length = binary->size;
fs_ptr->writer_ptr_->write(&binary_length, sizeof(binary_length));
fs_ptr->writer_ptr_->write((void*)binary->data.get(), binary_length);
}
fs_ptr->writer_ptr_->close();
double span = recorder.RecordSection("End");
double rate = fs_ptr->writer_ptr_->length() * 1000000.0 / span / 1024 / 1024;
LOG_ENGINE_DEBUG_ << "write_index(" << location << ") rate " << rate << "MB/s";
}
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <string>
#include "codecs/VectorIndexFormat.h"
namespace milvus {
namespace codec {
class DefaultVectorIndexFormat : public VectorIndexFormat {
public:
DefaultVectorIndexFormat() = default;
void
read(const storage::FSHandlerPtr& fs_ptr, const std::string& location, ExternalData externalData,
segment::VectorIndexPtr& vector_index) override;
void
write(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
const segment::VectorIndexPtr& vector_index) override;
// No copy and move
DefaultVectorIndexFormat(const DefaultVectorIndexFormat&) = delete;
DefaultVectorIndexFormat(DefaultVectorIndexFormat&&) = delete;
DefaultVectorIndexFormat&
operator=(const DefaultVectorIndexFormat&) = delete;
DefaultVectorIndexFormat&
operator=(DefaultVectorIndexFormat&&) = delete;
private:
knowhere::VecIndexPtr
read_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& path, const std::string& extern_key = "",
const knowhere::BinaryPtr& extern_data = nullptr);
};
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "codecs/default/DefaultVectorsFormat.h"
#include <fcntl.h>
#include <unistd.h>
#include <algorithm>
#include <memory>
#include <boost/filesystem.hpp>
#include "utils/Exception.h"
#include "utils/Log.h"
#include "utils/TimeRecorder.h"
namespace milvus {
namespace codec {
void
DefaultVectorsFormat::read_vectors_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
off_t offset, size_t num, std::vector<uint8_t>& raw_vectors) {
if (!fs_ptr->reader_ptr_->open(file_path.c_str())) {
std::string err_msg = "Failed to open file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_OPEN_FILE, err_msg);
}
size_t num_bytes;
fs_ptr->reader_ptr_->read(&num_bytes, sizeof(size_t));
num = std::min(num, num_bytes - offset);
offset += sizeof(size_t); // Beginning of file is num_bytes
fs_ptr->reader_ptr_->seekg(offset);
raw_vectors.resize(num / sizeof(uint8_t));
fs_ptr->reader_ptr_->read(raw_vectors.data(), num);
fs_ptr->reader_ptr_->close();
}
void
DefaultVectorsFormat::read_vectors_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::BinaryPtr& raw_vectors) {
if (!fs_ptr->reader_ptr_->open(file_path.c_str())) {
std::string err_msg = "Failed to open file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_OPEN_FILE, err_msg);
}
size_t num_bytes;
fs_ptr->reader_ptr_->read(&num_bytes, sizeof(size_t));
raw_vectors = std::make_shared<knowhere::Binary>();
raw_vectors->size = num_bytes;
raw_vectors->data = std::shared_ptr<uint8_t[]>(new uint8_t[num_bytes]);
// Beginning of file is num_bytes
fs_ptr->reader_ptr_->seekg(sizeof(size_t));
fs_ptr->reader_ptr_->read(raw_vectors->data.get(), num_bytes);
fs_ptr->reader_ptr_->close();
}
void
DefaultVectorsFormat::read_uids_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
std::vector<segment::doc_id_t>& uids) {
if (!fs_ptr->reader_ptr_->open(file_path.c_str())) {
std::string err_msg = "Failed to open file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_OPEN_FILE, err_msg);
}
size_t num_bytes;
fs_ptr->reader_ptr_->read(&num_bytes, sizeof(size_t));
uids.resize(num_bytes / sizeof(segment::doc_id_t));
fs_ptr->reader_ptr_->read(uids.data(), num_bytes);
fs_ptr->reader_ptr_->close();
}
void
DefaultVectorsFormat::read(const storage::FSHandlerPtr& fs_ptr, segment::VectorsPtr& vectors_read) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
if (!boost::filesystem::is_directory(dir_path)) {
std::string err_msg = "Directory: " + dir_path + "does not exist";
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_INVALID_ARGUMENT, err_msg);
}
boost::filesystem::path target_path(dir_path);
typedef boost::filesystem::directory_iterator d_it;
d_it it_end;
d_it it(target_path);
// for (auto& it : boost::filesystem::directory_iterator(dir_path)) {
for (; it != it_end; ++it) {
const auto& path = it->path();
if (path.extension().string() == raw_vector_extension_) {
auto& vector_list = vectors_read->GetMutableData();
read_vectors_internal(fs_ptr, path.string(), 0, INT64_MAX, vector_list);
vectors_read->SetName(path.stem().string());
} else if (path.extension().string() == user_id_extension_) {
auto& uids = vectors_read->GetMutableUids();
read_uids_internal(fs_ptr, path.string(), uids);
}
}
}
void
DefaultVectorsFormat::write(const storage::FSHandlerPtr& fs_ptr, const segment::VectorsPtr& vectors) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
const std::string rv_file_path = dir_path + "/" + vectors->GetName() + raw_vector_extension_;
const std::string uid_file_path = dir_path + "/" + vectors->GetName() + user_id_extension_;
TimeRecorder rc("write vectors");
if (!fs_ptr->writer_ptr_->open(rv_file_path.c_str())) {
std::string err_msg = "Failed to open file: " + rv_file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_CREATE_FILE, err_msg);
}
size_t rv_num_bytes = vectors->GetData().size() * sizeof(uint8_t);
fs_ptr->writer_ptr_->write(&rv_num_bytes, sizeof(size_t));
fs_ptr->writer_ptr_->write((void*)vectors->GetData().data(), rv_num_bytes);
fs_ptr->writer_ptr_->close();
rc.RecordSection("write rv done");
if (!fs_ptr->writer_ptr_->open(uid_file_path.c_str())) {
std::string err_msg = "Failed to open file: " + uid_file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_CREATE_FILE, err_msg);
}
size_t uid_num_bytes = vectors->GetUids().size() * sizeof(segment::doc_id_t);
fs_ptr->writer_ptr_->write(&uid_num_bytes, sizeof(size_t));
fs_ptr->writer_ptr_->write((void*)vectors->GetUids().data(), uid_num_bytes);
fs_ptr->writer_ptr_->close();
rc.RecordSection("write uids done");
}
void
DefaultVectorsFormat::read_uids(const storage::FSHandlerPtr& fs_ptr, std::vector<segment::doc_id_t>& uids) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
if (!boost::filesystem::is_directory(dir_path)) {
std::string err_msg = "Directory: " + dir_path + "does not exist";
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_INVALID_ARGUMENT, err_msg);
}
boost::filesystem::path target_path(dir_path);
typedef boost::filesystem::directory_iterator d_it;
d_it it_end;
d_it it(target_path);
// for (auto& it : boost::filesystem::directory_iterator(dir_path)) {
for (; it != it_end; ++it) {
const auto& path = it->path();
if (path.extension().string() == user_id_extension_) {
read_uids_internal(fs_ptr, path.string(), uids);
break;
}
}
}
void
DefaultVectorsFormat::read_vectors(const storage::FSHandlerPtr& fs_ptr, knowhere::BinaryPtr& raw_vectors) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
if (!boost::filesystem::is_directory(dir_path)) {
std::string err_msg = "Directory: " + dir_path + "does not exist";
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_INVALID_ARGUMENT, err_msg);
}
boost::filesystem::path target_path(dir_path);
typedef boost::filesystem::directory_iterator d_it;
d_it it_end;
d_it it(target_path);
// for (auto& it : boost::filesystem::directory_iterator(dir_path)) {
for (; it != it_end; ++it) {
const auto& path = it->path();
if (path.extension().string() == raw_vector_extension_) {
read_vectors_internal(fs_ptr, path.string(), raw_vectors);
break;
}
}
}
void
DefaultVectorsFormat::read_vectors(const storage::FSHandlerPtr& fs_ptr, off_t offset, size_t num_bytes,
std::vector<uint8_t>& raw_vectors) {
std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
if (!boost::filesystem::is_directory(dir_path)) {
std::string err_msg = "Directory: " + dir_path + "does not exist";
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_INVALID_ARGUMENT, err_msg);
}
boost::filesystem::path target_path(dir_path);
typedef boost::filesystem::directory_iterator d_it;
d_it it_end;
d_it it(target_path);
// for (auto& it : boost::filesystem::directory_iterator(dir_path)) {
for (; it != it_end; ++it) {
const auto& path = it->path();
if (path.extension().string() == raw_vector_extension_) {
read_vectors_internal(fs_ptr, path.string(), offset, num_bytes, raw_vectors);
break;
}
}
}
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <string>
#include <vector>
#include "codecs/VectorsFormat.h"
#include "segment/Vectors.h"
namespace milvus {
namespace codec {
class DefaultVectorsFormat : public VectorsFormat {
public:
DefaultVectorsFormat() = default;
void
read(const storage::FSHandlerPtr& fs_ptr, segment::VectorsPtr& vectors_read) override;
void
write(const storage::FSHandlerPtr& fs_ptr, const segment::VectorsPtr& vectors) override;
void
read_uids(const storage::FSHandlerPtr& fs_ptr, std::vector<segment::doc_id_t>& uids) override;
void
read_vectors(const storage::FSHandlerPtr& fs_ptr, knowhere::BinaryPtr& raw_vectors) override;
void
read_vectors(const storage::FSHandlerPtr& fs_ptr, off_t offset, size_t num_bytes,
std::vector<uint8_t>& raw_vectors) override;
// No copy and move
DefaultVectorsFormat(const DefaultVectorsFormat&) = delete;
DefaultVectorsFormat(DefaultVectorsFormat&&) = delete;
DefaultVectorsFormat&
operator=(const DefaultVectorsFormat&) = delete;
DefaultVectorsFormat&
operator=(DefaultVectorsFormat&&) = delete;
private:
void
read_vectors_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, off_t offset, size_t num,
std::vector<uint8_t>& raw_vectors);
void
read_vectors_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::BinaryPtr& raw_vectors);
void
read_uids_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
std::vector<segment::doc_id_t>& uids);
private:
const std::string raw_vector_extension_ = ".rv";
const std::string user_id_extension_ = ".uid";
};
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "codecs/snapshot/SSBlockFormat.h"
#include "codecs/snapshot/SSDeletedDocsFormat.h"
#include "codecs/snapshot/SSIdBloomFilterFormat.h"
#include "codecs/snapshot/SSStructuredIndexFormat.h"
#include "codecs/snapshot/SSVectorCompressFormat.h"
#include "codecs/snapshot/SSVectorIndexFormat.h"
namespace milvus {
namespace codec {
class SSCodec {
public:
static SSCodec&
instance();
SSBlockFormatPtr
GetBlockFormat();
SSVectorIndexFormatPtr
GetVectorIndexFormat();
SSStructuredIndexFormatPtr
GetStructuredIndexFormat();
SSDeletedDocsFormatPtr
GetDeletedDocsFormat();
SSIdBloomFilterFormatPtr
GetIdBloomFilterFormat();
SSVectorCompressFormatPtr
GetVectorCompressFormat();
private:
SSCodec();
private:
SSBlockFormatPtr block_format_ptr_;
SSStructuredIndexFormatPtr structured_index_format_ptr_;
SSVectorIndexFormatPtr vector_index_format_ptr_;
SSDeletedDocsFormatPtr deleted_docs_format_ptr_;
SSIdBloomFilterFormatPtr id_bloom_filter_format_ptr_;
SSVectorCompressFormatPtr vector_compress_format_ptr_;
};
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <string>
#include "segment/DeletedDocs.h"
#include "storage/FSHandler.h"
namespace milvus {
namespace codec {
class SSDeletedDocsFormat {
public:
SSDeletedDocsFormat() = default;
std::string
FilePostfix();
void
Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, segment::DeletedDocsPtr& deleted_docs);
void
Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const segment::DeletedDocsPtr& deleted_docs);
void
ReadSize(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, size_t& size);
// No copy and move
SSDeletedDocsFormat(const SSDeletedDocsFormat&) = delete;
SSDeletedDocsFormat(SSDeletedDocsFormat&&) = delete;
SSDeletedDocsFormat&
operator=(const SSDeletedDocsFormat&) = delete;
SSDeletedDocsFormat&
operator=(SSDeletedDocsFormat&&) = delete;
};
using SSDeletedDocsFormatPtr = std::shared_ptr<SSDeletedDocsFormat>;
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <string>
#include "segment/IdBloomFilter.h"
#include "storage/FSHandler.h"
namespace milvus {
namespace codec {
class SSIdBloomFilterFormat {
public:
SSIdBloomFilterFormat() = default;
std::string
FilePostfix();
void
Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
segment::IdBloomFilterPtr& id_bloom_filter_ptr);
void
Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const segment::IdBloomFilterPtr& id_bloom_filter_ptr);
void
Create(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
segment::IdBloomFilterPtr& id_bloom_filter_ptr);
// No copy and move
SSIdBloomFilterFormat(const SSIdBloomFilterFormat&) = delete;
SSIdBloomFilterFormat(SSIdBloomFilterFormat&&) = delete;
SSIdBloomFilterFormat&
operator=(const SSIdBloomFilterFormat&) = delete;
SSIdBloomFilterFormat&
operator=(SSIdBloomFilterFormat&&) = delete;
};
using SSIdBloomFilterFormatPtr = std::shared_ptr<SSIdBloomFilterFormat>;
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <string>
#include "knowhere/common/BinarySet.h"
#include "storage/FSHandler.h"
namespace milvus {
namespace codec {
class SSVectorCompressFormat {
public:
SSVectorCompressFormat() = default;
std::string
FilePostfix();
void
Read(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, knowhere::BinaryPtr& compress);
void
Write(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, const knowhere::BinaryPtr& compress);
// No copy and move
SSVectorCompressFormat(const SSVectorCompressFormat&) = delete;
SSVectorCompressFormat(SSVectorCompressFormat&&) = delete;
SSVectorCompressFormat&
operator=(const SSVectorCompressFormat&) = delete;
SSVectorCompressFormat&
operator=(SSVectorCompressFormat&&) = delete;
};
using SSVectorCompressFormatPtr = std::shared_ptr<SSVectorCompressFormat>;
} // namespace codec
} // namespace milvus
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "knowhere/index/vector_index/VecIndex.h"
#include "storage/FSHandler.h"
namespace milvus {
namespace codec {
class SSVectorIndexFormat {
public:
SSVectorIndexFormat() = default;
std::string
FilePostfix();
void
ReadRaw(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, knowhere::BinaryPtr& data);
void
ReadIndex(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, knowhere::BinarySet& data);
void
ReadCompress(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, knowhere::BinaryPtr& data);
void
ConvertRaw(const std::vector<uint8_t>& raw, knowhere::BinaryPtr& data);
void
ConstructIndex(const std::string& index_name, knowhere::BinarySet& index_data, knowhere::BinaryPtr& raw_data,
knowhere::BinaryPtr& compress_data, knowhere::VecIndexPtr& index);
void
WriteIndex(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, const knowhere::VecIndexPtr& index);
void
WriteCompress(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
const knowhere::VecIndexPtr& index);
// No copy and move
SSVectorIndexFormat(const SSVectorIndexFormat&) = delete;
SSVectorIndexFormat(SSVectorIndexFormat&&) = delete;
SSVectorIndexFormat&
operator=(const SSVectorIndexFormat&) = delete;
SSVectorIndexFormat&
operator=(SSVectorIndexFormat&&) = delete;
};
using SSVectorIndexFormatPtr = std::shared_ptr<SSVectorIndexFormat>;
} // namespace codec
} // namespace milvus
......@@ -28,12 +28,12 @@ using TaskPtr = std::shared_ptr<Task>;
namespace context {
struct HybridSearchContext {
struct SearchContext {
query::GeneralQueryPtr general_query_;
std::vector<::milvus::search::TaskPtr> tasks_;
};
using HybridSearchContextPtr = std::shared_ptr<HybridSearchContext>;
using SearchContextPtr = std::shared_ptr<SearchContext>;
} // namespace context
} // namespace milvus
......@@ -37,8 +37,6 @@ set(storage_files
)
aux_source_directory(${MILVUS_ENGINE_SRC}/index/archive wrapper_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/codecs codecs_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/codecs/default codecs_default_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/codecs/snapshot codecs_snapshot_files)
aux_source_directory(${MILVUS_ENGINE_SRC}/segment segment_files)
......@@ -67,8 +65,6 @@ set(engine_files
${thirdparty_files}
${wrapper_files}
${codecs_files}
${codecs_default_files}
${codecs_snapshot_files}
${segment_files}
)
......
......@@ -11,20 +11,19 @@
#pragma once
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "Options.h"
#include "Types.h"
#include "context/HybridSearchContext.h"
#include "db/Options.h"
#include "db/SimpleWaitNotify.h"
#include "db/SnapshotHandlers.h"
#include "db/insert/MemManager.h"
#include "db/merge/MergeManager.h"
#include "db/snapshot/Context.h"
#include "meta/Meta.h"
#include "query/GeneralQuery.h"
#include "segment/Segment.h"
#include "server/context/Context.h"
#include "db/snapshot/ResourceTypes.h"
#include "db/snapshot/Resources.h"
#include "utils/Status.h"
namespace milvus {
......@@ -48,143 +47,80 @@ class DB {
Stop() = 0;
virtual Status
CreateCollection(meta::CollectionSchema& table_schema_) = 0;
CreateCollection(const snapshot::CreateCollectionContext& context) = 0;
virtual Status
DropCollection(const std::string& collection_id) = 0;
DropCollection(const std::string& name) = 0;
virtual Status
DescribeCollection(meta::CollectionSchema& table_schema_) = 0;
DescribeCollection(const std::string& collection_name, snapshot::CollectionPtr& collection,
snapshot::CollectionMappings& fields_schema) = 0;
virtual Status
HasCollection(const std::string& collection_id, bool& has_or_not) = 0;
virtual Status
HasNativeCollection(const std::string& collection_id, bool& has_or_not) = 0;
HasCollection(const std::string& collection_name, bool& has_or_not) = 0;
virtual Status
AllCollections(std::vector<std::string>& names) = 0;
virtual Status
GetCollectionInfo(const std::string& collection_id, std::string& collection_info) = 0;
virtual Status
GetCollectionRowCount(const std::string& collection_id, uint64_t& row_count) = 0;
virtual Status
PreloadCollection(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
bool force = false) = 0;
virtual Status
ReLoadSegmentsDeletedDocs(const std::string& collection_id, const std::vector<int64_t>& segment_ids) = 0;
GetCollectionInfo(const std::string& collection_name, std::string& collection_info) = 0;
virtual Status
UpdateCollectionFlag(const std::string& collection_id, int64_t flag) = 0;
GetCollectionRowCount(const std::string& collection_name, uint64_t& row_count) = 0;
virtual Status
CreatePartition(const std::string& collection_id, const std::string& partition_name,
const std::string& partition_tag) = 0;
LoadCollection(const server::ContextPtr& context, const std::string& collection_name,
const std::vector<std::string>& field_names, bool force = false) = 0;
virtual Status
HasPartition(const std::string& collection_id, const std::string& tag, bool& has_or_not) = 0;
CreatePartition(const std::string& collection_name, const std::string& partition_name) = 0;
virtual Status
DropPartition(const std::string& partition_name) = 0;
DropPartition(const std::string& collection_name, const std::string& partition_name) = 0;
virtual Status
DropPartitionByTag(const std::string& collection_id, const std::string& partition_tag) = 0;
ShowPartitions(const std::string& collection_name, std::vector<std::string>& partition_names) = 0;
virtual Status
ShowPartitions(const std::string& collection_id, std::vector<meta::CollectionSchema>& partition_schema_array) = 0;
HasPartition(const std::string& collection_name, const std::string& partition_tag, bool& exist) = 0;
virtual Status
InsertVectors(const std::string& collection_id, const std::string& partition_tag, VectorsData& vectors) = 0;
InsertEntities(const std::string& collection_name, const std::string& partition_name, DataChunkPtr& data_chunk) = 0;
virtual Status
DeleteEntities(const std::string& collection_id, IDNumbers entity_ids) = 0;
DeleteEntities(const std::string& collection_name, engine::IDNumbers entity_ids) = 0;
virtual Status
Flush(const std::string& collection_id) = 0;
Flush(const std::string& collection_name) = 0;
virtual Status
Flush() = 0;
virtual Status
Compact(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
double threshold = 0.0) = 0;
virtual Status
GetVectorsByID(const engine::meta::CollectionSchema& collection, const IDNumbers& id_array,
std::vector<engine::VectorsData>& vectors) = 0;
virtual Status
GetEntitiesByID(const std::string& collection_id, const IDNumbers& id_array,
const std::vector<std::string>& field_names, std::vector<engine::VectorsData>& vectors,
std::vector<engine::AttrsData>& attrs) = 0;
virtual Status
GetVectorIDs(const std::string& collection_id, const std::string& segment_id, IDNumbers& vector_ids) = 0;
// virtual Status
// Merge(const std::set<std::string>& table_ids) = 0;
virtual Status
QueryByIDs(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
const std::vector<std::string>& partition_tags, uint64_t k, const milvus::json& extra_params,
const IDNumbers& id_array, ResultIds& result_ids, ResultDistances& result_distances) = 0;
virtual Status
Query(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
const std::vector<std::string>& partition_tags, uint64_t k, const milvus::json& extra_params,
VectorsData& vectors, ResultIds& result_ids, ResultDistances& result_distances) = 0;
Compact(const server::ContextPtr& context, const std::string& collection_name, double threshold = 0.0) = 0;
virtual Status
QueryByFileID(const std::shared_ptr<server::Context>& context, const std::vector<std::string>& file_ids, uint64_t k,
const milvus::json& extra_params, VectorsData& vectors, ResultIds& result_ids,
ResultDistances& result_distances) = 0;
GetEntityByID(const std::string& collection_name, const IDNumbers& id_array,
const std::vector<std::string>& field_names, DataChunkPtr& data_chunk) = 0;
virtual Status
Size(uint64_t& result) = 0;
GetEntityIDs(const std::string& collection_id, int64_t segment_id, IDNumbers& entity_ids) = 0;
virtual Status
CreateIndex(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
CreateIndex(const server::ContextPtr& context, const std::string& collection_id, const std::string& field_name,
const CollectionIndex& index) = 0;
virtual Status
DescribeIndex(const std::string& collection_id, CollectionIndex& index) = 0;
DescribeIndex(const std::string& collection_id, const std::string& field_name, CollectionIndex& index) = 0;
virtual Status
DropIndex(const std::string& collection_id) = 0;
virtual Status
DropAll() = 0;
DropIndex(const std::string& collection_name, const std::string& field_name) = 0;
virtual Status
CreateHybridCollection(meta::CollectionSchema& collection_schema, meta::hybrid::FieldsSchema& fields_schema) = 0;
virtual Status
DescribeHybridCollection(meta::CollectionSchema& collection_schema, meta::hybrid::FieldsSchema& fields_schema) = 0;
virtual Status
InsertEntities(const std::string& collection_id, const std::string& partition_tag,
const std::vector<std::string>& field_names, Entity& entity,
std::unordered_map<std::string, meta::hybrid::DataType>& field_types) = 0;
virtual Status
HybridQuery(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
const std::vector<std::string>& partition_tags, query::GeneralQueryPtr general_query,
query::QueryPtr query_ptr, std::vector<std::string>& field_name,
std::unordered_map<std::string, engine::meta::hybrid::DataType>& attr_type,
engine::QueryResult& result) = 0;
virtual Status
FlushAttrsIndex(const std::string& collection_id) = 0;
DropIndex(const std::string& collection_id) = 0;
virtual Status
CreateStructuredIndex(const std::string& collection_id, const std::vector<std::string>& field_names,
const std::unordered_map<std::string, meta::hybrid::DataType>& attr_types,
const std::unordered_map<std::string, std::vector<uint8_t>>& attr_data,
std::unordered_map<std::string, int64_t>& attr_size,
std::unordered_map<std::string, knowhere::IndexPtr>& attr_indexes) = 0;
Query(const server::ContextPtr& context, const query::QueryPtr& query_ptr, engine::QueryResultPtr& result) = 0;
}; // DB
using DBPtr = std::shared_ptr<DB>;
......
......@@ -11,7 +11,6 @@
#include "db/DBFactory.h"
#include "DBImpl.h"
#include "SSDBImpl.h"
#include "meta/MetaFactory.h"
#include "meta/MySQLMetaImpl.h"
#include "meta/SqliteMetaImpl.h"
......@@ -35,14 +34,9 @@ DBFactory::BuildOption() {
}
DBPtr
DBFactory::Build(const DBOptions& options) {
DBFactory::BuildDB(const DBOptions& options) {
return std::make_shared<DBImpl>(options);
}
SSDBPtr
DBFactory::BuildSSDB(const DBOptions& options) {
return std::make_shared<SSDBImpl>(options);
}
} // namespace engine
} // namespace milvus
......@@ -13,7 +13,6 @@
#include "DB.h"
#include "Options.h"
#include "SSDB.h"
#include <memory>
#include <string>
......@@ -27,10 +26,7 @@ class DBFactory {
BuildOption();
static DBPtr
Build(const DBOptions& options);
static SSDBPtr
BuildSSDB(const DBOptions& options);
BuildDB(const DBOptions& options);
};
} // namespace engine
......
此差异已折叠。
......@@ -21,276 +21,142 @@
#include <unordered_map>
#include <vector>
#include "config/ConfigMgr.h"
#include "db/DB.h"
#include "db/IndexFailedChecker.h"
#include "db/SimpleWaitNotify.h"
#include "db/Types.h"
#include "db/insert/MemManager.h"
#include "db/merge/MergeManager.h"
#include "db/meta/FilesHolder.h"
#include "db/snapshot/Context.h"
#include "utils/ThreadPool.h"
#include "wal/WalManager.h"
namespace milvus {
namespace engine {
namespace meta {
class Meta;
}
class DBImpl : public DB, public ConfigObserver {
class DBImpl : public DB {
public:
explicit DBImpl(const DBOptions& options);
~DBImpl();
Status
Start() override;
Status
Stop() override;
Status
DropAll() override;
Start();
Status
CreateCollection(meta::CollectionSchema& collection_schema) override;
Stop();
Status
DropCollection(const std::string& collection_id) override;
CreateCollection(const snapshot::CreateCollectionContext& context) override;
Status
DescribeCollection(meta::CollectionSchema& collection_schema) override;
DropCollection(const std::string& name) override;
Status
HasCollection(const std::string& collection_id, bool& has_or_not) override;
DescribeCollection(const std::string& collection_name, snapshot::CollectionPtr& collection,
snapshot::CollectionMappings& fields_schema) override;
Status
HasNativeCollection(const std::string& collection_id, bool& has_or_not_) override;
HasCollection(const std::string& collection_name, bool& has_or_not) override;
Status
AllCollections(std::vector<std::string>& names) override;
Status
GetCollectionInfo(const std::string& collection_id, std::string& collection_info) override;
Status
PreloadCollection(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
bool force = false) override;
Status
ReLoadSegmentsDeletedDocs(const std::string& collection_id, const std::vector<int64_t>& segment_ids) override;
GetCollectionInfo(const std::string& collection_name, std::string& collection_info);
Status
UpdateCollectionFlag(const std::string& collection_id, int64_t flag) override;
GetCollectionRowCount(const std::string& collection_name, uint64_t& row_count) override;
Status
GetCollectionRowCount(const std::string& collection_id, uint64_t& row_count) override;
LoadCollection(const server::ContextPtr& context, const std::string& collection_name,
const std::vector<std::string>& field_names, bool force = false) override;
Status
CreatePartition(const std::string& collection_id, const std::string& partition_name,
const std::string& partition_tag) override;
CreatePartition(const std::string& collection_name, const std::string& partition_name) override;
Status
HasPartition(const std::string& collection_id, const std::string& tag, bool& has_or_not) override;
DropPartition(const std::string& collection_name, const std::string& partition_name) override;
Status
DropPartition(const std::string& partition_name) override;
ShowPartitions(const std::string& collection_name, std::vector<std::string>& partition_names) override;
Status
DropPartitionByTag(const std::string& collection_id, const std::string& partition_tag) override;
HasPartition(const std::string& collection_name, const std::string& partition_tag, bool& exist) override;
Status
ShowPartitions(const std::string& collection_id,
std::vector<meta::CollectionSchema>& partition_schema_array) override;
InsertEntities(const std::string& collection_name, const std::string& partition_name,
DataChunkPtr& data_chunk) override;
Status
InsertVectors(const std::string& collection_id, const std::string& partition_tag, VectorsData& vectors) override;
DeleteEntities(const std::string& collection_name, engine::IDNumbers entity_ids) override;
Status
DeleteEntities(const std::string& collection_id, IDNumbers entity_ids) override;
Status
Flush(const std::string& collection_id) override;
Flush(const std::string& collection_name) override;
Status
Flush() override;
Status
Compact(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
double threshold = 0.0) override;
Status
GetVectorsByID(const engine::meta::CollectionSchema& collection, const IDNumbers& id_array,
std::vector<engine::VectorsData>& vectors) override;
Status
GetEntitiesByID(const std::string& collection_id, const IDNumbers& id_array,
const std::vector<std::string>& field_names, std::vector<engine::VectorsData>& vectors,
std::vector<engine::AttrsData>& attrs) override;
Status
GetVectorIDs(const std::string& collection_id, const std::string& segment_id, IDNumbers& vector_ids) override;
// Status
// Merge(const std::set<std::string>& collection_ids) override;
Compact(const server::ContextPtr& context, const std::string& collection_name, double threshold = 0.0) override;
Status
CreateIndex(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
const CollectionIndex& index) override;
GetEntityByID(const std::string& collection_name, const IDNumbers& id_array,
const std::vector<std::string>& field_names, DataChunkPtr& data_chunk) override;
Status
CreateStructuredIndex(const std::string& collection_id, const std::vector<std::string>& field_names,
const std::unordered_map<std::string, meta::hybrid::DataType>& attr_types,
const std::unordered_map<std::string, std::vector<uint8_t>>& attr_data,
std::unordered_map<std::string, int64_t>& attr_size,
std::unordered_map<std::string, knowhere::IndexPtr>& attr_indexes) override;
GetEntityIDs(const std::string& collection_name, int64_t segment_id, IDNumbers& entity_ids) override;
Status
DescribeIndex(const std::string& collection_id, CollectionIndex& index) override;
CreateIndex(const std::shared_ptr<server::Context>& context, const std::string& collection_name,
const std::string& field_name, const CollectionIndex& index) override;
Status
DropIndex(const std::string& collection_id) override;
DescribeIndex(const std::string& collection_name, const std::string& field_name, CollectionIndex& index) override;
Status
CreateHybridCollection(meta::CollectionSchema& collection_schema,
meta::hybrid::FieldsSchema& fields_schema) override;
DropIndex(const std::string& collection_name, const std::string& field_name) override;
Status
DescribeHybridCollection(meta::CollectionSchema& collection_schema,
meta::hybrid::FieldsSchema& fields_schema) override;
DropIndex(const std::string& collection_name) override;
Status
InsertEntities(const std::string& collection_name, const std::string& partition_tag,
const std::vector<std::string>& field_names, engine::Entity& entity,
std::unordered_map<std::string, meta::hybrid::DataType>& field_types) override;
Status
HybridQuery(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
const std::vector<std::string>& partition_tags, query::GeneralQueryPtr general_query,
query::QueryPtr query_ptr, std::vector<std::string>& field_names,
std::unordered_map<std::string, engine::meta::hybrid::DataType>& attr_type,
engine::QueryResult& result) override;
Status
QueryByIDs(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
const std::vector<std::string>& partition_tags, uint64_t k, const milvus::json& extra_params,
const IDNumbers& id_array, ResultIds& result_ids, ResultDistances& result_distances) override;
Status
Query(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
const std::vector<std::string>& partition_tags, uint64_t k, const milvus::json& extra_params,
VectorsData& vectors, ResultIds& result_ids, ResultDistances& result_distances) override;
Status
QueryByFileID(const std::shared_ptr<server::Context>& context, const std::vector<std::string>& file_ids, uint64_t k,
const milvus::json& extra_params, VectorsData& vectors, ResultIds& result_ids,
ResultDistances& result_distances) override;
Status
Size(uint64_t& result) override;
Status
FlushAttrsIndex(const std::string& collection_id) override;
public:
void
ConfigUpdate(const std::string& name) override;
Query(const server::ContextPtr& context, const query::QueryPtr& query_ptr, engine::QueryResultPtr& result) override;
private:
Status
QueryAsync(const std::shared_ptr<server::Context>& context, meta::FilesHolder& files_holder, uint64_t k,
const milvus::json& extra_params, VectorsData& vectors, ResultIds& result_ids,
ResultDistances& result_distances);
Status
HybridQueryAsync(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
meta::FilesHolder& files_holder, query::GeneralQueryPtr general_query, query::QueryPtr query_ptr,
std::vector<std::string>& field_names,
std::unordered_map<std::string, engine::meta::hybrid::DataType>& attr_type,
engine::QueryResult& result);
Status
GetVectorsByIdHelper(const IDNumbers& id_array, std::vector<engine::VectorsData>& vectors,
meta::FilesHolder& files_holder);
Status
GetEntitiesByIdHelper(const std::string& collection_id, const IDNumbers& id_array,
const std::vector<std::string>& field_names,
std::unordered_map<std::string, engine::meta::hybrid::DataType>& attr_type,
std::vector<engine::VectorsData>& vectors, std::vector<engine::AttrsData>& attrs,
meta::FilesHolder& files_holder);
void
InternalFlush(const std::string& collection_name = "");
void
InternalFlush(const std::string& collection_id = "");
TimingFlushThread();
void
BackgroundWalThread();
StartMetricTask();
void
BackgroundFlushThread();
TimingMetricThread();
void
BackgroundMetricThread();
StartBuildIndexTask(const std::vector<std::string>& collection_names);
void
BackgroundIndexThread();
BackgroundBuildIndexTask(std::vector<std::string> collection_names);
void
WaitMergeFileFinish();
TimingIndexThread();
void
WaitBuildIndexFinish();
void
StartMetricTask();
TimingWalThread();
void
StartMergeTask(const std::set<std::string>& merge_collection_ids, bool force_merge_all = false);
Status
ExecWalRecord(const wal::MXLogRecord& record);
void
BackgroundMerge(std::set<std::string> collection_ids, bool force_merge_all);
// Status
// MergeHybridFiles(const std::string& table_id, meta::FilesHolder& files_holder);
StartMergeTask(const std::set<std::string>& collection_names, bool force_merge_all = false);
void
StartBuildIndexTask();
BackgroundMerge(std::set<std::string> collection_names, bool force_merge_all);
void
BackgroundBuildIndex();
Status
CompactFile(const meta::SegmentSchema& file, double threshold, meta::SegmentsSchema& files_to_update);
Status
GetFilesToBuildIndex(const std::string& collection_id, const std::vector<int>& file_types,
meta::FilesHolder& files_holder);
Status
GetPartitionByTag(const std::string& collection_id, const std::string& partition_tag, std::string& partition_name);
Status
GetPartitionsByTags(const std::string& collection_id, const std::vector<std::string>& partition_tags,
std::set<std::string>& partition_name_array);
Status
UpdateCollectionIndexRecursively(const std::string& collection_id, const CollectionIndex& index);
Status
WaitCollectionIndexRecursively(const std::shared_ptr<server::Context>& context, const std::string& collection_id,
const CollectionIndex& index);
Status
DropCollectionIndexRecursively(const std::string& collection_id);
Status
GetCollectionRowCountRecursively(const std::string& collection_id, uint64_t& row_count);
Status
ExecWalRecord(const wal::MXLogRecord& record);
WaitMergeFileFinish();
void
SuspendIfFirst();
......@@ -298,18 +164,10 @@ class DBImpl : public DB, public ConfigObserver {
void
ResumeIfLast();
Status
SerializeStructuredIndex(const meta::SegmentSchema& segment_schema,
const std::unordered_map<std::string, knowhere::IndexPtr>& attr_indexes,
const std::unordered_map<std::string, int64_t>& attr_sizes,
const std::unordered_map<std::string, meta::hybrid::DataType>& attr_types);
private:
DBOptions options_;
std::atomic<bool> initialized_;
meta::MetaPtr meta_ptr_;
MemManagerPtr mem_mgr_;
MergeManagerPtr merge_mgr_ptr_;
......@@ -338,13 +196,13 @@ class DBImpl : public DB, public ConfigObserver {
std::mutex build_index_mutex_;
IndexFailedChecker index_failed_checker_;
std::mutex flush_merge_compact_mutex_;
int64_t live_search_num_ = 0;
std::mutex suspend_build_mutex_;
}; // DBImpl
}; // SSDBImpl
using DBImplPtr = std::shared_ptr<DBImpl>;
} // namespace engine
} // namespace milvus
......@@ -15,6 +15,7 @@
#include "utils/Status.h"
#include <cstddef>
#include <mutex>
#include <vector>
namespace milvus {
......
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#include <utility>
#include <vector>
#include "db/IndexFailedChecker.h"
namespace milvus {
namespace engine {
constexpr uint64_t INDEX_FAILED_RETRY_TIME = 1;
Status
IndexFailedChecker::CleanFailedIndexFileOfCollection(const std::string& collection_id) {
std::lock_guard<std::mutex> lck(mutex_);
index_failed_files_.erase(collection_id); // rebuild failed index files for this collection
return Status::OK();
}
Status
IndexFailedChecker::GetErrMsgForCollection(const std::string& collection_id, std::string& err_msg) {
std::lock_guard<std::mutex> lck(mutex_);
auto iter = index_failed_files_.find(collection_id);
if (iter != index_failed_files_.end()) {
err_msg = iter->second.begin()->second[0];
}
return Status::OK();
}
Status
IndexFailedChecker::MarkFailedIndexFile(const meta::SegmentSchema& file, const std::string& err_msg) {
std::lock_guard<std::mutex> lck(mutex_);
auto iter = index_failed_files_.find(file.collection_id_);
if (iter == index_failed_files_.end()) {
File2ErrArray failed_files;
failed_files.insert(std::make_pair(file.file_id_, std::vector<std::string>(1, err_msg)));
index_failed_files_.insert(std::make_pair(file.collection_id_, failed_files));
} else {
auto it_failed_files = iter->second.find(file.file_id_);
if (it_failed_files != iter->second.end()) {
it_failed_files->second.push_back(err_msg);
} else {
iter->second.insert(std::make_pair(file.file_id_, std::vector<std::string>(1, err_msg)));
}
}
return Status::OK();
}
Status
IndexFailedChecker::MarkSucceedIndexFile(const meta::SegmentSchema& file) {
std::lock_guard<std::mutex> lck(mutex_);
auto iter = index_failed_files_.find(file.collection_id_);
if (iter != index_failed_files_.end()) {
iter->second.erase(file.file_id_);
if (iter->second.empty()) {
index_failed_files_.erase(file.collection_id_);
}
}
return Status::OK();
}
bool
IndexFailedChecker::IsFailedIndexFile(const meta::SegmentSchema& file) {
std::lock_guard<std::mutex> lck(mutex_);
auto it_failed_files = index_failed_files_.find(file.collection_id_);
if (it_failed_files != index_failed_files_.end()) {
auto it_failed_file = it_failed_files->second.find(file.file_id_);
if (it_failed_file != it_failed_files->second.end()) {
if (it_failed_file->second.size() >= INDEX_FAILED_RETRY_TIME) {
return true;
}
}
}
return false;
}
Status
IndexFailedChecker::IgnoreFailedIndexFiles(meta::SegmentsSchema& table_files) {
std::lock_guard<std::mutex> lck(mutex_);
// there could be some failed files belong to different collection.
// some files may has failed for several times, no need to build index for these files.
// thus we can avoid dead circle for build index operation
for (auto it_file = table_files.begin(); it_file != table_files.end();) {
auto it_failed_files = index_failed_files_.find((*it_file).collection_id_);
if (it_failed_files != index_failed_files_.end()) {
auto it_failed_file = it_failed_files->second.find((*it_file).file_id_);
if (it_failed_file != it_failed_files->second.end()) {
if (it_failed_file->second.size() >= INDEX_FAILED_RETRY_TIME) {
it_file = table_files.erase(it_file);
continue;
}
}
}
++it_file;
}
return Status::OK();
}
} // namespace engine
} // namespace milvus
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#pragma once
#include "db/Types.h"
#include "meta/Meta.h"
#include "utils/Status.h"
#include <map>
#include <mutex>
#include <string>
namespace milvus {
namespace engine {
class IndexFailedChecker {
public:
Status
CleanFailedIndexFileOfCollection(const std::string& collection_id);
Status
GetErrMsgForCollection(const std::string& collection_id, std::string& err_msg);
Status
MarkFailedIndexFile(const meta::SegmentSchema& file, const std::string& err_msg);
Status
MarkSucceedIndexFile(const meta::SegmentSchema& file);
bool
IsFailedIndexFile(const meta::SegmentSchema& file);
Status
IgnoreFailedIndexFiles(meta::SegmentsSchema& table_files);
private:
std::mutex mutex_;
Table2FileErr index_failed_files_; // collection id mapping to (file id mapping to failed times)
};
} // namespace engine
} // namespace milvus
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "db/Options.h"
#include "db/SimpleWaitNotify.h"
#include "db/SnapshotHandlers.h"
#include "db/insert/SSMemManager.h"
#include "db/merge/MergeManager.h"
#include "db/snapshot/Context.h"
#include "db/snapshot/ResourceTypes.h"
#include "db/snapshot/Resources.h"
#include "utils/Status.h"
namespace milvus {
namespace engine {
class SSDB {
public:
SSDB() = default;
SSDB(const SSDB&) = delete;
SSDB&
operator=(const SSDB&) = delete;
virtual ~SSDB() = default;
virtual Status
Start() = 0;
virtual Status
Stop() = 0;
virtual Status
CreateCollection(const snapshot::CreateCollectionContext& context) = 0;
virtual Status
DropCollection(const std::string& name) = 0;
virtual Status
DescribeCollection(const std::string& collection_name, snapshot::CollectionPtr& collection,
snapshot::CollectionMappings& fields_schema) = 0;
virtual Status
HasCollection(const std::string& collection_name, bool& has_or_not) = 0;
virtual Status
AllCollections(std::vector<std::string>& names) = 0;
virtual Status
GetCollectionInfo(const std::string& collection_name, std::string& collection_info) = 0;
virtual Status
GetCollectionRowCount(const std::string& collection_name, uint64_t& row_count) = 0;
virtual Status
LoadCollection(const server::ContextPtr& context, const std::string& collection_name,
const std::vector<std::string>& field_names, bool force = false) = 0;
virtual Status
CreatePartition(const std::string& collection_name, const std::string& partition_name) = 0;
virtual Status
DropPartition(const std::string& collection_name, const std::string& partition_name) = 0;
virtual Status
ShowPartitions(const std::string& collection_name, std::vector<std::string>& partition_names) = 0;
virtual Status
HasPartition(const std::string& collection_name, const std::string& partition_tag, bool& exist) = 0;
virtual Status
InsertEntities(const std::string& collection_name, const std::string& partition_name, DataChunkPtr& data_chunk) = 0;
virtual Status
DeleteEntities(const std::string& collection_name, engine::IDNumbers entity_ids) = 0;
virtual Status
Flush(const std::string& collection_name) = 0;
virtual Status
Flush() = 0;
virtual Status
Compact(const server::ContextPtr& context, const std::string& collection_name, double threshold = 0.0) = 0;
virtual Status
GetEntityByID(const std::string& collection_name, const IDNumbers& id_array,
const std::vector<std::string>& field_names, DataChunkPtr& data_chunk) = 0;
virtual Status
GetEntityIDs(const std::string& collection_id, int64_t segment_id, IDNumbers& entity_ids) = 0;
virtual Status
CreateIndex(const server::ContextPtr& context, const std::string& collection_id, const std::string& field_name,
const CollectionIndex& index) = 0;
virtual Status
DescribeIndex(const std::string& collection_id, const std::string& field_name, CollectionIndex& index) = 0;
virtual Status
DropIndex(const std::string& collection_name, const std::string& field_name) = 0;
virtual Status
DropIndex(const std::string& collection_id) = 0;
virtual Status
Query(const server::ContextPtr& context, const query::QueryPtr& query_ptr, engine::QueryResultPtr& result) = 0;
}; // SSDB
using SSDBPtr = std::shared_ptr<SSDB>;
} // namespace engine
} // namespace milvus
此差异已折叠。
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#pragma once
#include <atomic>
#include <list>
#include <memory>
#include <mutex>
#include <set>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>
#include "db/SSDB.h"
#include "utils/ThreadPool.h"
#include "wal/WalManager.h"
namespace milvus {
namespace engine {
class SSDBImpl : public SSDB {
public:
explicit SSDBImpl(const DBOptions& options);
~SSDBImpl();
Status
Start();
Status
Stop();
Status
CreateCollection(const snapshot::CreateCollectionContext& context) override;
Status
DropCollection(const std::string& name) override;
Status
DescribeCollection(const std::string& collection_name, snapshot::CollectionPtr& collection,
snapshot::CollectionMappings& fields_schema) override;
Status
HasCollection(const std::string& collection_name, bool& has_or_not) override;
Status
AllCollections(std::vector<std::string>& names) override;
Status
GetCollectionInfo(const std::string& collection_name, std::string& collection_info);
Status
GetCollectionRowCount(const std::string& collection_name, uint64_t& row_count) override;
Status
LoadCollection(const server::ContextPtr& context, const std::string& collection_name,
const std::vector<std::string>& field_names, bool force = false) override;
Status
CreatePartition(const std::string& collection_name, const std::string& partition_name) override;
Status
DropPartition(const std::string& collection_name, const std::string& partition_name) override;
Status
ShowPartitions(const std::string& collection_name, std::vector<std::string>& partition_names) override;
Status
HasPartition(const std::string& collection_name, const std::string& partition_tag, bool& exist) override;
Status
InsertEntities(const std::string& collection_name, const std::string& partition_name,
DataChunkPtr& data_chunk) override;
Status
DeleteEntities(const std::string& collection_name, engine::IDNumbers entity_ids) override;
Status
Flush(const std::string& collection_name) override;
Status
Flush() override;
Status
Compact(const server::ContextPtr& context, const std::string& collection_name, double threshold = 0.0) override;
Status
GetEntityByID(const std::string& collection_name, const IDNumbers& id_array,
const std::vector<std::string>& field_names, DataChunkPtr& data_chunk) override;
Status
GetEntityIDs(const std::string& collection_name, int64_t segment_id, IDNumbers& entity_ids) override;
Status
CreateIndex(const std::shared_ptr<server::Context>& context, const std::string& collection_name,
const std::string& field_name, const CollectionIndex& index) override;
Status
DescribeIndex(const std::string& collection_name, const std::string& field_name, CollectionIndex& index) override;
Status
DropIndex(const std::string& collection_name, const std::string& field_name) override;
Status
DropIndex(const std::string& collection_name) override;
Status
Query(const server::ContextPtr& context, const query::QueryPtr& query_ptr, engine::QueryResultPtr& result) override;
private:
void
InternalFlush(const std::string& collection_name = "");
void
TimingFlushThread();
void
StartMetricTask();
void
TimingMetricThread();
void
StartBuildIndexTask(const std::vector<std::string>& collection_names);
void
BackgroundBuildIndexTask(std::vector<std::string> collection_names);
void
TimingIndexThread();
void
WaitBuildIndexFinish();
void
TimingWalThread();
Status
ExecWalRecord(const wal::MXLogRecord& record);
void
StartMergeTask(const std::set<std::string>& collection_names, bool force_merge_all = false);
void
BackgroundMerge(std::set<std::string> collection_names, bool force_merge_all);
void
WaitMergeFileFinish();
void
SuspendIfFirst();
void
ResumeIfLast();
private:
DBOptions options_;
std::atomic<bool> initialized_;
SSMemManagerPtr mem_mgr_;
MergeManagerPtr merge_mgr_ptr_;
std::shared_ptr<wal::WalManager> wal_mgr_;
std::thread bg_wal_thread_;
std::thread bg_flush_thread_;
std::thread bg_metric_thread_;
std::thread bg_index_thread_;
SimpleWaitNotify swn_wal_;
SimpleWaitNotify swn_flush_;
SimpleWaitNotify swn_metric_;
SimpleWaitNotify swn_index_;
SimpleWaitNotify flush_req_swn_;
SimpleWaitNotify index_req_swn_;
ThreadPool merge_thread_pool_;
std::mutex merge_result_mutex_;
std::list<std::future<void>> merge_thread_results_;
ThreadPool index_thread_pool_;
std::mutex index_result_mutex_;
std::list<std::future<void>> index_thread_results_;
std::mutex build_index_mutex_;
std::mutex flush_merge_compact_mutex_;
int64_t live_search_num_ = 0;
std::mutex suspend_build_mutex_;
}; // SSDBImpl
using SSDBImplPtr = std::shared_ptr<SSDBImpl>;
} // namespace engine
} // namespace milvus
......@@ -18,7 +18,7 @@
#include "db/snapshot/Resources.h"
#include "db/snapshot/Snapshot.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "segment/SSSegmentReader.h"
#include "segment/SegmentReader.h"
#include <unordered_map>
#include <utility>
......@@ -134,7 +134,7 @@ GetEntityByIdSegmentHandler::Handle(const snapshot::SegmentPtr& segment) {
if (segment_visitor == nullptr) {
return Status(DB_ERROR, "Fail to build segment visitor with id " + std::to_string(segment->GetID()));
}
segment::SSSegmentReader segment_reader(dir_root_, segment_visitor);
segment::SegmentReader segment_reader(dir_root_, segment_visitor);
auto uid_field_visitor = segment_visitor->GetFieldVisitor(DEFAULT_UID_NAME);
......
......@@ -16,7 +16,6 @@
#include "db/snapshot/IterateHandler.h"
#include "db/snapshot/Snapshot.h"
#include "segment/Segment.h"
#include "segment/Types.h"
#include "server/context/Context.h"
#include "utils/Log.h"
......
......@@ -22,15 +22,13 @@
#include <utility>
#include <vector>
#include "db/engine/ExecutionEngine.h"
#include "db/meta/MetaTypes.h"
#include "segment/Types.h"
#include "utils/Json.h"
namespace milvus {
namespace engine {
typedef segment::doc_id_t IDNumber;
typedef int64_t IDNumber;
typedef IDNumber* IDNumberPtr;
typedef std::vector<IDNumber> IDNumbers;
......
......@@ -17,6 +17,7 @@
#include "Options.h"
#include "db/Types.h"
#include "db/meta/MetaTypes.h"
#include "utils/Status.h"
namespace milvus {
namespace engine {
......
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "cache/DataObj.h"
namespace milvus {
namespace Attr {
class Attr : public milvus::cache::DataObj {
public:
Attr() = default;
Attr(std::unordered_map<std::string, std::vector<uint8_t>> attr_data,
std::unordered_map<std::string, int64_t> attr_size, int64_t entity_count)
: attr_data_(std::move(attr_data)), attr_size_(std::move(attr_size)), entity_count_(entity_count) {
}
void
SetAttrData(std::unordered_map<std::string, std::vector<uint8_t>> attr_data) {
attr_data_ = std::move(attr_data);
}
void
SetAttrSize(std::unordered_map<std::string, int64_t> attr_size) {
attr_size_ = std::move(attr_size);
}
void
SetEntityCount(int64_t entity_count) {
entity_count_ = entity_count;
}
std::unordered_map<std::string, std::vector<uint8_t>>
attr_data() {
return attr_data_;
}
std::unordered_map<std::string, int64_t>
attr_size() {
return attr_size_;
}
int64_t
entity_count() {
return entity_count_;
}
int64_t
attr_data_size() {
int64_t attr_data_size = 0;
auto attr_it = attr_size_.begin();
for (; attr_it != attr_size_.end(); attr_it++) {
attr_data_size += attr_it->first.size() + attr_it->second;
}
return attr_data_size;
}
int64_t
Size() override {
return attr_data_size();
}
private:
std::unordered_map<std::string, std::vector<uint8_t>> attr_data_;
std::unordered_map<std::string, int64_t> attr_size_;
int64_t entity_count_;
};
using AttrPtr = std::shared_ptr<Attr>;
} // namespace Attr
} // namespace milvus
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "cache/DataObj.h"
namespace milvus {
namespace Attr {
class AttrIndex : public milvus::cache::DataObj {
public:
AttrIndex() = default;
AttrIndex(std::unordered_map<std::string, knowhere::IndexPtr> index_data,
std::unordered_map<std::string, int64_t> index_size, int64_t entity_count)
: index_data_(std::move(index_data)), index_size_(std::move(index_size)), entity_count_(entity_count) {
}
void
SetIndexData(std::unordered_map<std::string, knowhere::IndexPtr> attr_data) {
index_data_ = std::move(attr_data);
}
void
SetIndexSize(std::unordered_map<std::string, int64_t> attr_size) {
index_size_ = std::move(attr_size);
}
void
SetEntityCount(int64_t entity_count) {
entity_count_ = entity_count;
}
std::unordered_map<std::string, knowhere::IndexPtr>
attr_index_data() {
return index_data_;
}
std::unordered_map<std::string, int64_t>
attr_index_size() {
return index_size_;
}
int64_t
entity_count() {
return entity_count_;
}
int64_t
index_data_size() {
int64_t attr_data_size = 0;
auto attr_it = index_size_.begin();
for (; attr_it != index_size_.end(); attr_it++) {
attr_data_size += attr_it->first.size() + attr_it->second;
}
return attr_data_size;
}
int64_t
Size() override {
return index_data_size();
}
private:
std::unordered_map<std::string, knowhere::IndexPtr> index_data_;
std::unordered_map<std::string, int64_t> index_size_;
int64_t entity_count_;
};
using AttrIndexPtr = std::shared_ptr<AttrIndex>;
} // namespace Attr
} // namespace milvus
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "db/Utils.h"
#include "db/attr/InstanceStructuredIndex.h"
#include "db/meta/FilesHolder.h"
#include "segment/SegmentReader.h"
#include "segment/SegmentWriter.h"
namespace milvus {
namespace Attr {
Status
InstanceStructuredIndex::CreateStructuredIndex(const std::string& collection_id, const engine::meta::MetaPtr meta_ptr) {
std::vector<int> file_types = {
milvus::engine::meta::SegmentSchema::RAW,
milvus::engine::meta::SegmentSchema::TO_INDEX,
};
engine::meta::FilesHolder files_holder;
auto status = meta_ptr->FilesByType(collection_id, file_types, files_holder);
if (!status.ok()) {
return status;
}
engine::meta::CollectionSchema collection_schema;
engine::meta::hybrid::FieldsSchema fields_schema;
collection_schema.collection_id_ = collection_id;
status = meta_ptr->DescribeHybridCollection(collection_schema, fields_schema);
if (!status.ok()) {
return Status::OK();
}
for (auto& segment_schema : files_holder.HoldFiles()) {
std::string segment_dir;
engine::utils::GetParentPath(segment_schema.location_, segment_dir);
auto segment_reader_ptr = std::make_shared<segment::SegmentReader>(segment_dir);
segment::SegmentPtr segment_ptr;
segment_reader_ptr->GetSegment(segment_ptr);
status = segment_reader_ptr->Load();
if (!status.ok()) {
return status;
}
std::unordered_map<std::string, std::vector<uint8_t>> attr_datas;
std::unordered_map<std::string, int64_t> attr_sizes;
std::unordered_map<std::string, engine::meta::hybrid::DataType> attr_types;
std::vector<std::string> field_names;
for (auto& field_schema : fields_schema.fields_schema_) {
if (field_schema.field_type_ != (int32_t)engine::meta::hybrid::DataType::VECTOR_FLOAT) {
attr_types.insert(
std::make_pair(field_schema.field_name_, (engine::meta::hybrid::DataType)field_schema.field_type_));
field_names.emplace_back(field_schema.field_name_);
}
}
auto attrs = segment_ptr->attrs_ptr_->attrs;
auto attr_it = attrs.begin();
for (; attr_it != attrs.end(); attr_it++) {
if (attr_it->second->GetCount() != 0) {
attr_datas.insert(std::make_pair(attr_it->first, attr_it->second->GetMutableData()));
attr_sizes.insert(std::make_pair(attr_it->first, attr_it->second->GetCount()));
}
}
std::unordered_map<std::string, knowhere::IndexPtr> attr_indexes;
status = GenStructuredIndex(collection_id, field_names, attr_types, attr_datas, attr_sizes, attr_indexes);
if (!status.ok()) {
return status;
}
status = SerializeStructuredIndex(segment_schema, attr_indexes, attr_sizes, attr_types);
if (!status.ok()) {
return status;
}
}
return Status::OK();
}
Status
InstanceStructuredIndex::GenStructuredIndex(
const std::string& collection_id, const std::vector<std::string>& field_names,
const std::unordered_map<std::string, engine::meta::hybrid::DataType>& attr_types,
const std::unordered_map<std::string, std::vector<uint8_t>>& attr_datas,
std::unordered_map<std::string, int64_t>& attr_sizes,
std::unordered_map<std::string, knowhere::IndexPtr>& attr_indexes) {
if (attr_sizes.empty() || attr_datas.empty()) {
return Status{SERVER_UNEXPECTED_ERROR, "attributes data is null when generate structured index"};
}
for (auto& field_name : field_names) {
knowhere::IndexPtr index_ptr = nullptr;
switch (attr_types.at(field_name)) {
case engine::meta::hybrid::DataType::INT8: {
auto attr_size = attr_sizes.at(field_name);
std::vector<int8_t> attr_data(attr_size);
memcpy(attr_data.data(), attr_datas.at(field_name).data(), attr_size);
auto int8_index_ptr = std::make_shared<knowhere::StructuredIndexSort<int8_t>>(
(size_t)attr_size, reinterpret_cast<const signed char*>(attr_data.data()));
index_ptr = std::static_pointer_cast<knowhere::Index>(int8_index_ptr);
attr_indexes.insert(std::make_pair(field_name, index_ptr));
attr_sizes.at(field_name) *= sizeof(int8_t);
break;
}
case engine::meta::hybrid::DataType::INT16: {
auto attr_size = attr_sizes.at(field_name);
std::vector<int16_t> attr_data(attr_size);
memcpy(attr_data.data(), attr_datas.at(field_name).data(), attr_size);
auto int16_index_ptr = std::make_shared<knowhere::StructuredIndexSort<int16_t>>(
(size_t)attr_size, reinterpret_cast<const int16_t*>(attr_data.data()));
index_ptr = std::static_pointer_cast<knowhere::Index>(int16_index_ptr);
attr_indexes.insert(std::make_pair(field_name, index_ptr));
attr_sizes.at(field_name) *= sizeof(int16_t);
break;
}
case engine::meta::hybrid::DataType::INT32: {
auto attr_size = attr_sizes.at(field_name);
std::vector<int32_t> attr_data(attr_size);
memcpy(attr_data.data(), attr_datas.at(field_name).data(), attr_size);
auto int32_index_ptr = std::make_shared<knowhere::StructuredIndexSort<int32_t>>(
(size_t)attr_size, reinterpret_cast<const int32_t*>(attr_data.data()));
index_ptr = std::static_pointer_cast<knowhere::Index>(int32_index_ptr);
attr_indexes.insert(std::make_pair(field_name, index_ptr));
attr_sizes.at(field_name) *= sizeof(int32_t);
break;
}
case engine::meta::hybrid::DataType::INT64: {
auto attr_size = attr_sizes.at(field_name);
std::vector<int64_t> attr_data(attr_size);
memcpy(attr_data.data(), attr_datas.at(field_name).data(), attr_size);
auto int64_index_ptr = std::make_shared<knowhere::StructuredIndexSort<int64_t>>(
(size_t)attr_size, reinterpret_cast<const int64_t*>(attr_data.data()));
index_ptr = std::static_pointer_cast<knowhere::Index>(int64_index_ptr);
attr_indexes.insert(std::make_pair(field_name, index_ptr));
attr_sizes.at(field_name) *= sizeof(int64_t);
break;
}
case engine::meta::hybrid::DataType::FLOAT: {
auto attr_size = attr_sizes.at(field_name);
std::vector<float> attr_data(attr_size);
memcpy(attr_data.data(), attr_datas.at(field_name).data(), attr_size);
auto float_index_ptr = std::make_shared<knowhere::StructuredIndexSort<float>>(
(size_t)attr_size, reinterpret_cast<const float*>(attr_data.data()));
index_ptr = std::static_pointer_cast<knowhere::Index>(float_index_ptr);
attr_indexes.insert(std::make_pair(field_name, index_ptr));
attr_sizes.at(field_name) *= sizeof(float);
break;
}
case engine::meta::hybrid::DataType::DOUBLE: {
auto attr_size = attr_sizes.at(field_name);
std::vector<double> attr_data(attr_size);
memcpy(attr_data.data(), attr_datas.at(field_name).data(), attr_size);
auto double_index_ptr = std::make_shared<knowhere::StructuredIndexSort<double>>(
(size_t)attr_size, reinterpret_cast<const double*>(attr_data.data()));
index_ptr = std::static_pointer_cast<knowhere::Index>(double_index_ptr);
attr_indexes.insert(std::make_pair(field_name, index_ptr));
attr_sizes.at(field_name) *= sizeof(double);
break;
}
default: {}
}
}
#if 0
{
std::unordered_map<std::string, engine::meta::hybrid::DataType> attr_type;
engine::meta::CollectionSchema collection_schema;
engine::meta::hybrid::FieldsSchema fields_schema;
collection_schema.collection_id_ = collection_id;
status = meta_ptr_->DescribeHybridCollection(collection_schema, fields_schema);
if (!status.ok()) {
return status;
}
if (field_names.empty()) {
for (auto& schema : fields_schema.fields_schema_) {
field_names.emplace_back(schema.collection_id_);
}
}
for (auto& schema : fields_schema.fields_schema_) {
attr_type.insert(std::make_pair(schema.field_name_, (engine::meta::hybrid::DataType)schema.field_type_));
}
meta::FilesHolder files_holder;
meta_ptr_->FilesToIndex(files_holder);
milvus::engine::meta::SegmentsSchema& to_index_files = files_holder.HoldFiles();
status = index_failed_checker_.IgnoreFailedIndexFiles(to_index_files);
if (!status.ok()) {
return status;
}
status = SerializeStructuredIndex(to_index_files, attr_type, field_names);
if (!status.ok()) {
return status;
}
}
#endif
return Status::OK();
}
Status
InstanceStructuredIndex::SerializeStructuredIndex(
const engine::meta::SegmentSchema& segment_schema,
const std::unordered_map<std::string, knowhere::IndexPtr>& attr_indexes,
const std::unordered_map<std::string, int64_t>& attr_sizes,
const std::unordered_map<std::string, engine::meta::hybrid::DataType>& attr_types) {
auto status = Status::OK();
std::string segment_dir;
engine::utils::GetParentPath(segment_schema.location_, segment_dir);
auto segment_writer_ptr = std::make_shared<segment::SegmentWriter>(segment_dir);
status = segment_writer_ptr->SetAttrsIndex(attr_indexes, attr_sizes, attr_types);
if (!status.ok()) {
return status;
}
status = segment_writer_ptr->WriteAttrsIndex();
if (!status.ok()) {
return status;
}
return status;
}
} // namespace Attr
} // namespace milvus
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
#include <src/db/meta/Meta.h>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "db/meta/MetaTypes.h"
#include "knowhere/index/structured_index/StructuredIndexSort.h"
#include "utils/Status.h"
namespace milvus {
namespace Attr {
class InstanceStructuredIndex {
public:
static Status
CreateStructuredIndex(const std::string& collection_id, const engine::meta::MetaPtr meta_ptr);
static Status
GenStructuredIndex(const std::string& collection_id, const std::vector<std::string>& field_names,
const std::unordered_map<std::string, engine::meta::hybrid::DataType>& attr_types,
const std::unordered_map<std::string, std::vector<uint8_t>>& attr_datas,
std::unordered_map<std::string, int64_t>& attr_sizes,
std::unordered_map<std::string, knowhere::IndexPtr>& attr_indexes);
static Status
SerializeStructuredIndex(const engine::meta::SegmentSchema& segment_schema,
const std::unordered_map<std::string, knowhere::IndexPtr>& attr_indexes,
const std::unordered_map<std::string, int64_t>& attr_sizes,
const std::unordered_map<std::string, engine::meta::hybrid::DataType>& attr_types);
};
} // namespace Attr
} // namespace milvus
......@@ -11,7 +11,6 @@
#include "db/engine/EngineFactory.h"
#include "db/engine/ExecutionEngineImpl.h"
#include "db/engine/SSExecutionEngineImpl.h"
#include "db/snapshot/Snapshots.h"
#include "utils/Log.h"
......@@ -20,29 +19,13 @@
namespace milvus {
namespace engine {
ExecutionEnginePtr
EngineFactory::Build(uint16_t dimension, const std::string& location, EngineType index_type, MetricType metric_type,
const milvus::json& index_params) {
if (index_type == EngineType::INVALID) {
LOG_ENGINE_ERROR_ << "Unsupported engine type";
return nullptr;
}
LOG_ENGINE_DEBUG_ << "EngineFactory index type: " << (int)index_type;
ExecutionEnginePtr execution_engine_ptr =
std::make_shared<ExecutionEngineImpl>(dimension, location, index_type, metric_type, index_params);
execution_engine_ptr->Init();
return execution_engine_ptr;
}
SSExecutionEnginePtr
EngineFactory::Build(const std::string& dir_root, const std::string& collection_name, int64_t segment_id) {
snapshot::ScopedSnapshotT ss;
snapshot::Snapshots::GetInstance().GetSnapshot(ss, collection_name);
auto seg_visitor = engine::SegmentVisitor::Build(ss, segment_id);
SSExecutionEnginePtr execution_engine_ptr = std::make_shared<SSExecutionEngineImpl>(dir_root, seg_visitor);
SSExecutionEnginePtr execution_engine_ptr = std::make_shared<ExecutionEngineImpl>(dir_root, seg_visitor);
return execution_engine_ptr;
}
......
......@@ -12,7 +12,6 @@
#pragma once
#include "ExecutionEngine.h"
#include "SSExecutionEngine.h"
#include "utils/Json.h"
#include "utils/Status.h"
......@@ -23,10 +22,6 @@ namespace engine {
class EngineFactory {
public:
static ExecutionEnginePtr
Build(uint16_t dimension, const std::string& location, EngineType index_type, MetricType metric_type,
const milvus::json& index_params);
static SSExecutionEnginePtr
Build(const std::string& dir_root, const std::string& collection_name, int64_t segment_id);
};
......
......@@ -17,109 +17,35 @@
#include <unordered_map>
#include <vector>
#include <faiss/utils/ConcurrentBitset.h>
#include "db/Types.h"
#include "db/meta/MetaTypes.h"
#include "query/GeneralQuery.h"
#include "utils/Json.h"
#include "utils/Status.h"
namespace milvus {
namespace scheduler {
class SearchJob;
using SearchJobPtr = std::shared_ptr<SearchJob>;
} // namespace scheduler
namespace engine {
struct ExecutionEngineContext {
query::QueryPtr query_ptr_;
QueryResultPtr query_result_;
};
class ExecutionEngine {
public:
virtual Status
AddWithIds(int64_t n, const float* xdata, const int64_t* xids) = 0;
virtual Status
AddWithIds(int64_t n, const uint8_t* xdata, const int64_t* xids) = 0;
virtual size_t
Count() const = 0;
virtual size_t
Dimension() const = 0;
virtual size_t
Size() const = 0;
virtual Status
Serialize() = 0;
virtual Status
Load(bool to_cache = true) = 0;
virtual Status
LoadAttr(bool to_cache = true) = 0;
virtual Status
CopyToGpu(uint64_t device_id, bool hybrid) = 0;
Load(ExecutionEngineContext& context) = 0;
virtual Status
CopyToIndexFileToGpu(uint64_t device_id) = 0;
CopyToGpu(uint64_t device_id) = 0;
virtual Status
CopyToCpu() = 0;
// virtual std::shared_ptr<ExecutionEngine>
// Clone() = 0;
// virtual Status
// Merge(const std::string& location) = 0;
Search(ExecutionEngineContext& context) = 0;
#if 0
virtual Status
GetVectorByID(const int64_t id, float* vector, bool hybrid) = 0;
virtual Status
GetVectorByID(const int64_t id, uint8_t* vector, bool hybrid) = 0;
#endif
virtual Status
ExecBinaryQuery(query::GeneralQueryPtr general_query, faiss::ConcurrentBitsetPtr& bitset,
std::unordered_map<std::string, meta::hybrid::DataType>& attr_type,
std::string& vector_placeholder) = 0;
virtual Status
HybridSearch(scheduler::SearchJobPtr job, std::unordered_map<std::string, meta::hybrid::DataType>& attr_type,
std::vector<float>& distances, std::vector<int64_t>& search_ids, bool hybrid) = 0;
virtual Status
Search(std::vector<int64_t>& ids, std::vector<float>& distances, scheduler::SearchJobPtr job, bool hybrid) = 0;
virtual std::shared_ptr<ExecutionEngine>
BuildIndex(const std::string& location, EngineType engine_type) = 0;
virtual Status
Cache() = 0;
virtual Status
AttrCache() = 0;
virtual Status
Init() = 0;
virtual EngineType
IndexEngineType() const = 0;
virtual MetricType
IndexMetricType() const = 0;
virtual std::string
GetLocation() const = 0;
virtual std::string
GetAttrLocation() const = 0;
BuildIndex() = 0;
};
using ExecutionEnginePtr = std::shared_ptr<ExecutionEngine>;
using SSExecutionEnginePtr = std::shared_ptr<ExecutionEngine>;
} // namespace engine
} // namespace milvus
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "db/Types.h"
#include "db/meta/MetaTypes.h"
#include "query/GeneralQuery.h"
#include "utils/Status.h"
namespace milvus {
namespace engine {
struct ExecutionEngineContext {
query::QueryPtr query_ptr_;
QueryResultPtr query_result_;
};
class SSExecutionEngine {
public:
virtual Status
Load(ExecutionEngineContext& context) = 0;
virtual Status
CopyToGpu(uint64_t device_id) = 0;
virtual Status
Search(ExecutionEngineContext& context) = 0;
virtual Status
BuildIndex() = 0;
};
using SSExecutionEnginePtr = std::shared_ptr<SSExecutionEngine>;
} // namespace engine
} // namespace milvus
此差异已折叠。
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "SSExecutionEngine.h"
#include "db/SnapshotVisitor.h"
#include "segment/SSSegmentReader.h"
namespace milvus {
namespace engine {
class SSExecutionEngineImpl : public SSExecutionEngine {
public:
SSExecutionEngineImpl(const std::string& dir_root, const SegmentVisitorPtr& segment_visitor);
Status
Load(ExecutionEngineContext& context) override;
Status
CopyToGpu(uint64_t device_id) override;
Status
Search(ExecutionEngineContext& context) override;
Status
BuildIndex() override;
private:
knowhere::VecIndexPtr
CreatetVecIndex(const std::string& index_name);
Status
LoadForSearch(const query::QueryPtr& query_ptr);
Status
LoadForIndex();
Status
Load(const std::vector<std::string>& field_names);
private:
std::string root_path_;
SegmentVisitorPtr segment_visitor_;
segment::SSSegmentReaderPtr segment_reader_;
int64_t gpu_num_ = 0;
bool gpu_enable_ = false;
};
} // namespace engine
} // namespace milvus
此差异已折叠。
......@@ -11,7 +11,6 @@
#include "db/insert/MemManagerFactory.h"
#include "MemManagerImpl.h"
#include "SSMemManagerImpl.h"
#include "utils/Exception.h"
#include "utils/Log.h"
......@@ -27,13 +26,8 @@ namespace milvus {
namespace engine {
MemManagerPtr
MemManagerFactory::Build(const std::shared_ptr<meta::Meta>& meta, const DBOptions& options) {
return std::make_shared<MemManagerImpl>(meta, options);
}
SSMemManagerPtr
MemManagerFactory::SSBuild(const DBOptions& options) {
return std::make_shared<SSMemManagerImpl>(options);
MemManagerFactory::Build(const DBOptions& options) {
return std::make_shared<MemManagerImpl>(options);
}
} // namespace engine
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -21,9 +21,6 @@ namespace engine {
class MergeManagerFactory {
public:
static MergeManagerPtr
Build(const meta::MetaPtr& meta_ptr, const DBOptions& options);
static MergeManagerPtr
SSBuild(const DBOptions& options);
};
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册