提交 8e12445e 编写于 作者: Y Yu Kun

add GpuCache Metrics


Former-commit-id: 9da6fcb11f69302fb1c00bb16fe4c6f7b7699ee5
上级 7f9def46
......@@ -312,6 +312,7 @@ void DBImpl::StartMetricTask() {
int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage*100/cache_total);
server::Metrics::GetInstance().GpuCacheUsageGaugeSet();
uint64_t size;
Size(size);
server::Metrics::GetInstance().DataFileSizeGaugeSet(size);
......
......@@ -576,7 +576,7 @@ const char descriptor_table_protodef_milvus_2eproto[] PROTOBUF_SECTION_VARIABLE(
"rpc.Status\"\000\022B\n\rDescribeIndex\022\026.milvus.g"
"rpc.TableName\032\027.milvus.grpc.IndexParam\"\000"
"\022:\n\tDropIndex\022\026.milvus.grpc.TableName\032\023."
"milvus.grpc.Status\"\000b\006proto3"
"milvus.grpc.Status\"\000B\002H\001b\006proto3"
;
static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const descriptor_table_milvus_2eproto_deps[1] = {
&::descriptor_table_status_2eproto,
......@@ -603,7 +603,7 @@ static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_mil
static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_milvus_2eproto_once;
static bool descriptor_table_milvus_2eproto_initialized = false;
const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_milvus_2eproto = {
&descriptor_table_milvus_2eproto_initialized, descriptor_table_protodef_milvus_2eproto, "milvus.proto", 2388,
&descriptor_table_milvus_2eproto_initialized, descriptor_table_protodef_milvus_2eproto, "milvus.proto", 2392,
&descriptor_table_milvus_2eproto_once, descriptor_table_milvus_2eproto_sccs, descriptor_table_milvus_2eproto_deps, 17, 1,
schemas, file_default_instances, TableStruct_milvus_2eproto::offsets,
file_level_metadata_milvus_2eproto, 17, file_level_enum_descriptors_milvus_2eproto, file_level_service_descriptors_milvus_2eproto,
......
syntax = "proto3";
option optimize_for = SPEED;
import "status.proto";
......
......@@ -32,7 +32,7 @@ class MetricsBase{
virtual void BuildIndexDurationSecondsHistogramObserve(double value) {};
virtual void CpuCacheUsageGaugeSet(double value) {};
virtual void GpuCacheUsageGaugeSet(double value) {};
virtual void GpuCacheUsageGaugeSet() {};
virtual void MetaAccessTotalIncrement(double value = 1) {};
virtual void MetaAccessDurationSecondsHistogramObserve(double value) {};
......
......@@ -167,15 +167,26 @@ void PrometheusMetrics::CPUTemperature() {
}
}
void PrometheusMetrics::GpuCacheUsageGaugeSet(double value) {
void PrometheusMetrics::GpuCacheUsageGaugeSet() {
if(!startup_) return;
int64_t num_processors = server::SystemInfo::GetInstance().num_processor();
server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE);
std::string gpu_ids_str = config.GetValue(server::CONFIG_GPU_IDS, "0,1");
for (auto i = 0; i < num_processors; ++i) {
// int gpu_cache_usage = cache::GpuCacheMgr::GetInstance(i)->CacheUsage();
// int gpu_cache_total = cache::GpuCacheMgr::GetInstance(i)->CacheCapacity();
// prometheus::Gauge &gpu_cache = gpu_cache_usage_.Add({{"GPU_Cache", std::to_string(i)}});
// gpu_cache.Set(gpu_cache_usage * 100 / gpu_cache_total);
std::vector<uint64_t > gpu_ids;
std::stringstream ss(gpu_ids_str);
for (int i; ss >> i;) {
gpu_ids.push_back(i);
if (ss.peek() == ',') {
ss.ignore();
}
}
for(auto i = 0; i < gpu_ids.size(); ++i) {
uint64_t cache_usage = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheUsage();
uint64_t cache_capacity = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheCapacity();
prometheus::Gauge &gpu_cache = gpu_cache_usage_.Add({{"GPU_Cache", std::to_string(i)}});
gpu_cache.Set(cache_usage * 100 / cache_capacity);
}
}
......
......@@ -55,7 +55,7 @@ class PrometheusMetrics: public MetricsBase {
void IndexFileSizeHistogramObserve(double value) override { if(startup_) index_files_size_histogram_.Observe(value);};
void BuildIndexDurationSecondsHistogramObserve(double value) override { if(startup_) build_index_duration_seconds_histogram_.Observe(value);};
void CpuCacheUsageGaugeSet(double value) override { if(startup_) cpu_cache_usage_gauge_.Set(value);};
void GpuCacheUsageGaugeSet(double value) override;
void GpuCacheUsageGaugeSet() override;
void MetaAccessTotalIncrement(double value = 1) override { if(startup_) meta_access_total_.Increment(value);};
void MetaAccessDurationSecondsHistogramObserve(double value) override { if(startup_) meta_access_duration_seconds_histogram_.Observe(value);};
......@@ -343,7 +343,7 @@ class PrometheusMetrics: public MetricsBase {
.Help("current cache usage by bytes")
.Register(*registry_);
prometheus::Gauge &cpu_cache_usage_gauge_ = cpu_cache_usage_.Add({});
//record GPU cache usage and %
prometheus::Family<prometheus::Gauge> &gpu_cache_usage_ = prometheus::BuildGauge()
.Name("gpu_cache_usage_bytes")
......
......@@ -60,7 +60,7 @@ ResourceMgr::Connect(const std::string &name1, const std::string &name2, Connect
void
ResourceMgr::Connect(ResourceWPtr &res1, ResourceWPtr &res2, Connection &connection) {
if (auto observe_a = res1.lock()) {
if (auto observe_a = res1.loc k()) {
if (auto observe_b = res2.lock()) {
observe_a->AddNeighbour(std::static_pointer_cast<Node>(observe_b), connection);
observe_b->AddNeighbour(std::static_pointer_cast<Node>(observe_a), connection);
......
......@@ -184,6 +184,7 @@ ClientProxy::Insert(const std::string &table_name,
for (auto &record : record_array) {
::milvus::grpc::RowRecord *grpc_record = insert_param.add_row_record_array();
// memcpy(grpc_record->vector_data().data(), record.data.data(), record.data.size() * sizeof(float));
for (size_t i = 0; i < record.data.size(); i++) {
grpc_record->add_vector_data(record.data[i]);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册