提交 fb72d917 编写于 作者: zlw5307's avatar zlw5307 提交者: neverchanje

refactor: refactor the code about prometheus in pegasus_counter_reporter (#463)

上级 944093b8
......@@ -670,10 +670,11 @@ function run_start_onebox()
for i in $(seq ${META_COUNT})
do
meta_port=$((34600+i))
prometheus_port=$((9091+i))
mkdir -p meta$i;
cd meta$i
ln -s -f ${SERVER_PATH}/pegasus_server pegasus_server
sed "s/@META_PORT@/$meta_port/;s/@REPLICA_PORT@/34800/" ${ROOT}/config-server.ini >config.ini
sed "s/@META_PORT@/$meta_port/;s/@REPLICA_PORT@/34800/;s/@PROMETHEUS_PORT@/$prometheus_port/" ${ROOT}/config-server.ini >config.ini
echo "cd `pwd` && $PWD/pegasus_server config.ini -app_list meta &>result &"
$PWD/pegasus_server config.ini -app_list meta &>result &
PID=$!
......@@ -682,11 +683,12 @@ function run_start_onebox()
done
for j in $(seq ${REPLICA_COUNT})
do
prometheus_port=$((9091+${META_COUNT}+j))
replica_port=$((34800+j))
mkdir -p replica$j
cd replica$j
ln -s -f ${SERVER_PATH}/pegasus_server pegasus_server
sed "s/@META_PORT@/34600/;s/@REPLICA_PORT@/$replica_port/" ${ROOT}/config-server.ini >config.ini
sed "s/@META_PORT@/34600/;s/@REPLICA_PORT@/$replica_port/;s/@PROMETHEUS_PORT@/$prometheus_port/" ${ROOT}/config-server.ini >config.ini
echo "cd `pwd` && $PWD/pegasus_server config.ini -app_list replica &>result &"
$PWD/pegasus_server config.ini -app_list replica &>result &
PID=$!
......@@ -698,7 +700,7 @@ function run_start_onebox()
mkdir -p collector
cd collector
ln -s -f ${SERVER_PATH}/pegasus_server pegasus_server
sed "s/@META_PORT@/34600/;s/@REPLICA_PORT@/34800/" ${ROOT}/config-server.ini >config.ini
sed "s/@META_PORT@/34600/;s/@REPLICA_PORT@/34800/;s/@PROMETHEUS_PORT@/9091/" ${ROOT}/config-server.ini >config.ini
echo "cd `pwd` && $PWD/pegasus_server config.ini -app_list collector &>result &"
$PWD/pegasus_server config.ini -app_list collector &>result &
PID=$!
......
......@@ -16,12 +16,12 @@ find_package(prometheus-cpp)#TODO(huangwei5): make it optional
# the INTERFACE_LINK_LIBRARIES of prometheus contains the absolute path of libcurl
# when we use the compiled prometheus-cpp libs, the path of libcurl should be our own path
find_package(CURL)
get_target_property(_libs prometheus-cpp::push INTERFACE_LINK_LIBRARIES)
get_target_property(_libs prometheus-cpp::pull INTERFACE_LINK_LIBRARIES)
string(REGEX REPLACE ";/.*libcurl\.a" ";${CURL_LIBRARIES}" _libs "${_libs}")
set_target_properties(prometheus-cpp::push PROPERTIES INTERFACE_LINK_LIBRARIES "${_libs}")
set_target_properties(prometheus-cpp::pull PROPERTIES INTERFACE_LINK_LIBRARIES "${_libs}")
dsn_add_static_library()
target_link_libraries(${MY_PROJ_NAME} PUBLIC pegasus_base
prometheus-cpp::push
prometheus-cpp::pull
) # TODO(huangwei5): dsn_add_static_library doesnt link libs, need fix
......@@ -8,7 +8,6 @@
#include <ios>
#include <iomanip>
#include <iostream>
#include <unistd.h>
#include <dsn/cpp/service_app.h>
......@@ -21,10 +20,11 @@
#include <map>
#include <memory>
#include <string>
#include <fmt/format.h>
using namespace ::dsn;
static std::string GetHostName()
static std::string get_hostname()
{
char hostname[1024];
......@@ -34,9 +34,20 @@ static std::string GetHostName()
return hostname;
}
static void change_metrics_name(std::string &metrics_name)
static std::string get_hostip()
{
uint32_t ip = dsn::rpc_address::ipv4_from_network_interface("");
uint32_t ipnet = htonl(ip);
char buffer[512];
memset(buffer, 0, sizeof(buffer));
assert(inet_ntop(AF_INET, &ipnet, buffer, sizeof(buffer)));
return buffer;
}
static void format_metrics_name(std::string &metrics_name)
{
replace(metrics_name.begin(), metrics_name.end(), '@', ':');
replace(metrics_name.begin(), metrics_name.end(), '#', ':');
replace(metrics_name.begin(), metrics_name.end(), '.', '_');
replace(metrics_name.begin(), metrics_name.end(), '*', '_');
replace(metrics_name.begin(), metrics_name.end(), '(', '_');
......@@ -65,8 +76,7 @@ pegasus_counter_reporter::pegasus_counter_reporter()
_update_interval_seconds(0),
_last_report_time_ms(0),
_enable_logging(false),
_enable_falcon(false),
_enable_prometheus(false),
_perf_counter_sink(perf_counter_sink_t::INVALID),
_falcon_port(0),
_prometheus_port(0)
{
......@@ -76,17 +86,13 @@ pegasus_counter_reporter::~pegasus_counter_reporter() { stop(); }
void pegasus_counter_reporter::prometheus_initialize()
{
_prometheus_host = dsn_config_get_value_string(
"pegasus.server", "prometheus_host", "127.0.0.1", "prometheus gateway host");
_prometheus_port = (uint16_t)dsn_config_get_value_uint64(
"pegasus.server", "prometheus_port", 9091, "prometheus gateway port");
ddebug("prometheus initialize: host:port(%s:%d)", _prometheus_host.c_str(), _prometheus_port);
const auto &labels = prometheus::Gateway::GetInstanceLabel(GetHostName());
_gateway = std::make_shared<prometheus::Gateway>(
_prometheus_host, std::to_string(_prometheus_port), "pegasus", labels);
_registry = std::make_shared<prometheus::Registry>();
_gateway->RegisterCollectable(_registry);
_exposer = dsn::make_unique<prometheus::Exposer>(
fmt::format("{}:{}", get_hostip().c_str(), _prometheus_port));
_exposer->RegisterCollectable(_registry);
}
void pegasus_counter_reporter::falcon_initialize()
......@@ -148,18 +154,22 @@ void pegasus_counter_reporter::start()
_enable_logging = dsn_config_get_value_bool(
"pegasus.server", "perf_counter_enable_logging", true, "perf_counter_enable_logging");
_enable_falcon = dsn_config_get_value_bool(
"pegasus.server", "perf_counter_enable_falcon", false, "perf_counter_enable_falcon");
_enable_prometheus = dsn_config_get_value_bool("pegasus.server",
"perf_counter_enable_prometheus",
false,
"perf_counter_enable_prometheus");
if (_enable_falcon) {
std::string perf_counter_sink =
dsn_config_get_value_string("pegasus.server", "perf_counter_sink", "", "perf_counter_sink");
if ("prometheus" == perf_counter_sink) {
_perf_counter_sink = perf_counter_sink_t::PROMETHEUS;
} else if ("falcon" == perf_counter_sink) {
_perf_counter_sink = perf_counter_sink_t::FALCON;
} else {
_perf_counter_sink = perf_counter_sink_t::INVALID;
}
if (perf_counter_sink_t::FALCON == _perf_counter_sink) {
falcon_initialize();
}
if (_enable_prometheus) {
if (perf_counter_sink_t::PROMETHEUS == _perf_counter_sink) {
prometheus_initialize();
}
......@@ -178,6 +188,8 @@ void pegasus_counter_reporter::stop()
if (_report_timer != nullptr) {
_report_timer->cancel();
}
_exposer = nullptr;
_registry = nullptr;
}
void pegasus_counter_reporter::update_counters_to_falcon(const std::string &result,
......@@ -207,7 +219,7 @@ void pegasus_counter_reporter::update()
ddebug("%s", oss.str().c_str());
}
if (_enable_falcon) {
if (perf_counter_sink_t::FALCON == _perf_counter_sink) {
std::stringstream oss;
oss << "[";
......@@ -229,20 +241,21 @@ void pegasus_counter_reporter::update()
update_counters_to_falcon(oss.str(), timestamp);
}
if (_enable_prometheus) {
perf_counters::instance().iterate_snapshot([this](
if (perf_counter_sink_t::PROMETHEUS == _perf_counter_sink) {
const std::string hostname = get_hostname();
perf_counters::instance().iterate_snapshot([&hostname, this](
const dsn::perf_counters::counter_snapshot &cs) {
std::string metrics_name = cs.name;
// prometheus metric_name don't support characters like .*()@, it only support ":"
// and "_"
// so change the name to make it all right
change_metrics_name(metrics_name);
format_metrics_name(metrics_name);
// split metric_name like "collector_app_pegasus_app_stat_multi_put_qps:1_0_p999" or
// "collector_app_pegasus_app_stat_multi_put_qps:1_0"
// app[0] = "1" which is the app_id
// app[1] = "0" which is the partition_cout
// app[0] = "1" which is the app(app name or app id)
// app[1] = "0" which is the partition_index
// app[2] = "p999" or "" which represent the percent
std::string app[3] = {"", "", ""};
std::list<std::string> lv;
......@@ -256,14 +269,25 @@ void pegasus_counter_reporter::update()
i++;
}
}
/**
* deal with corner case, for example:
* replica*eon.replica*table.level.RPC_RRDB_RRDB_GET.latency(ns)@${table_name}.p999
* in this case, app[0] = app name, app[1] = p999, app[2] = ""
**/
if ("p999" == app[1]) {
app[2] = app[1];
app[1].clear();
}
// create metrics that prometheus support to report data
metrics_name = lv.front() + app[2];
std::map<std::string, prometheus::Family<prometheus::Gauge> *>::iterator it =
_gauge_family_map.find(metrics_name);
if (it == _gauge_family_map.end()) {
auto &add_gauge_family = prometheus::BuildGauge()
.Name(metrics_name)
.Labels({{"service", "pegasus"},
{"host_name", hostname},
{"cluster", _cluster_name},
{"pegasus_job", _app_name},
{"port", std::to_string(_local_port)}})
......@@ -274,13 +298,9 @@ void pegasus_counter_reporter::update()
.first;
}
auto &second_gauge = it->second->Add(
{{"app_id", app[0]}, {"partition_count", app[1]}, {"percent", app[2]}});
auto &second_gauge = it->second->Add({{"app", app[0]}, {"partition", app[1]}});
second_gauge.Set(cs.value);
});
// report data to pushgateway
_gateway->Push();
}
ddebug("update now_ms(%lld), last_report_time_ms(%lld)", now, _last_report_time_ms);
......
......@@ -16,7 +16,7 @@
#include <event2/keyvalq_struct.h>
#include <prometheus/registry.h>
#include <prometheus/gateway.h>
#include <prometheus/exposer.h>
namespace pegasus {
namespace server {
......@@ -35,6 +35,13 @@ struct falcon_metric
DEFINE_JSON_SERIALIZATION(endpoint, metric, timestamp, step, value, counterType, tags)
};
enum class perf_counter_sink_t
{
FALCON,
PROMETHEUS,
INVALID
};
class pegasus_counter_reporter : public ::dsn::utils::singleton<pegasus_counter_reporter>
{
public:
......@@ -72,8 +79,7 @@ private:
// perf counter flags
bool _enable_logging;
bool _enable_falcon;
bool _enable_prometheus;
perf_counter_sink_t _perf_counter_sink;
// falcon relates
std::string _falcon_host;
......@@ -82,10 +88,9 @@ private:
falcon_metric _falcon_metric;
// prometheus relates
std::string _prometheus_host;
uint16_t _prometheus_port;
std::shared_ptr<prometheus::Registry> _registry;
std::shared_ptr<prometheus::Gateway> _gateway;
std::unique_ptr<prometheus::Exposer> _exposer;
std::map<std::string, prometheus::Family<prometheus::Gauge> *> _gauge_family_map;
};
}
......
......@@ -289,8 +289,11 @@
perf_counter_update_interval_seconds = 10
perf_counter_enable_logging = false
perf_counter_enable_falcon = false
perf_counter_enable_prometheus = false
# Where the metrics are collected. If no value is given, no sink is used.
# Options:
# - falcon
# - prometheus
perf_counter_sink =
perf_counter_read_capacity_unit_size = 4096
perf_counter_write_capacity_unit_size = 4096
......@@ -298,7 +301,7 @@
falcon_port = 1988
falcon_path = /v1/push
prometheus_host = 127.0.0.1
# The HTTP port exposed to Prometheus for pulling metrics from pegasus server.
prometheus_port = 9091
[pegasus.collector]
......
......@@ -114,6 +114,13 @@
[pegasus.server]
perf_counter_enable_logging = false
# Where the metrics are collected. If no value is given, no sink is used.
# Options:
# - falcon
# - prometheus
perf_counter_sink =
# The HTTP port exposed to Prometheus for pulling metrics from pegasus server.
prometheus_port = @PROMETHEUS_PORT@
[pegasus.collector]
available_detect_app = @APP_NAME@
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册