提交 178de1fe 编写于 作者: N ning

v6 release

上级 87899cbe
......@@ -430,4 +430,4 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
limitations under the License.
\ No newline at end of file
.PHONY: start build
NOW = $(shell date -u '+%Y%m%d%I%M%S')
APP = n9e
SERVER_BIN = $(APP)
ROOT:=$(shell pwd -P)
GIT_COMMIT:=$(shell git --work-tree ${ROOT} rev-parse 'HEAD^{commit}')
_GIT_VERSION:=$(shell git --work-tree ${ROOT} describe --tags --abbrev=14 "${GIT_COMMIT}^{commit}" 2>/dev/null)
TAG=$(shell echo "${_GIT_VERSION}" | awk -F"-" '{print $$1}')
RELEASE_VERSION:="$(TAG)-$(GIT_COMMIT)"
# RELEASE_ROOT = release
# RELEASE_SERVER = release/${APP}
# GIT_COUNT = $(shell git rev-list --all --count)
# GIT_HASH = $(shell git rev-parse --short HEAD)
# RELEASE_TAG = $(RELEASE_VERSION).$(GIT_COUNT).$(GIT_HASH)
all: build
build:
go build -ldflags "-w -s -X github.com/didi/nightingale/v5/src/pkg/version.VERSION=$(RELEASE_VERSION)" -o $(SERVER_BIN) ./src
build-linux:
GOOS=linux GOARCH=amd64 go build -ldflags "-w -s -X github.com/didi/nightingale/v5/src/pkg/version.VERSION=$(RELEASE_VERSION)" -o $(SERVER_BIN) ./src
# start:
# @go run -ldflags "-X main.VERSION=$(RELEASE_TAG)" ./cmd/${APP}/main.go web -c ./configs/config.toml -m ./configs/model.conf --menu ./configs/menu.yaml
run_webapi:
nohup ./n9e webapi > webapi.log 2>&1 &
go build -ldflags "-w -s -X github.com/ccfos/nightingale/v6/pkg/version.Version=$(RELEASE_VERSION)" -o n9e ./cmd/center/main.go
run_server:
nohup ./n9e server > server.log 2>&1 &
build-alert:
go build -ldflags "-w -s -X github.com/ccfos/nightingale/v6/pkg/version.Version=$(RELEASE_VERSION)" -o n9e-alert ./cmd/alert/main.go
# swagger:
# @swag init --parseDependency --generalInfo ./cmd/${APP}/main.go --output ./internal/app/swagger
build-pushgw:
go build -ldflags "-w -s -X github.com/ccfos/nightingale/v6/pkg/version.Version=$(RELEASE_VERSION)" -o n9e-pushgw ./cmd/pushgw/main.go
# wire:
# @wire gen ./internal/app
build-cli:
go build -ldflags "-w -s -X github.com/ccfos/nightingale/v6/pkg/version.Version=$(RELEASE_VERSION)" -o n9e-cli ./cmd/cli/main.go
# test:
# cd ./internal/app/test && go test -v
run:
nohup ./n9e > n9e.log 2>&1 &
# clean:
# rm -rf data release $(SERVER_BIN) internal/app/test/data cmd/${APP}/data
run_alert:
nohup ./n9e-alert > n9e-alert.log 2>&1 &
pack: build
rm -rf $(APP)-$(RELEASE_VERSION).tar.gz
tar -zcvf $(APP)-$(RELEASE_VERSION).tar.gz docker etc $(SERVER_BIN) pub/font pub/index.html pub/assets pub/image
run_pushgw:
nohup ./n9e-pushgw > n9e-pushgw.log 2>&1 &
\ No newline at end of file
......@@ -91,7 +91,7 @@
- 补充和完善文档 => [n9e.github.io](https://n9e.github.io/)
- 分享您在使用夜莺监控过程中的最佳实践和经验心得 => [文章分享](https://n9e.github.io/docs/prologue/share/)
- 提交产品建议 =》 [github issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md)
- 提交代码,让夜莺监控更快、更稳、更好用 => [github pull request](https://github.com/didi/nightingale/pulls)
- 提交代码,让夜莺监控更快、更稳、更好用 => [github pull request](https://github.com/ccfos/nightingale/pulls)
**尊重、认可和记录每一位贡献者的工作**是夜莺开源社区的第一指导原则,我们提倡**高效的提问**,这既是对开发者时间的尊重,也是对整个社区知识沉淀的贡献:
- 提问之前请先查阅 [FAQ](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq)
......@@ -112,7 +112,7 @@
</a>
## License
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
[Apache License V2.0](https://github.com/ccfos/nightingale/blob/main/LICENSE)
## Contact Us
推荐您关注夜莺监控公众号,及时获取相关产品和社区动态:
......
......@@ -49,7 +49,7 @@ Nightingale is an cloud-native monitoring system by All-In-On design, support en
<img src="doc/img/install-vm.png" width="680">
## Contact us and feedback questions
- We recommend that you use [github issue](https://github.com/didi/nightingale/issues) as the preferred channel for issue feedback and requirement submission;
- We recommend that you use [github issue](https://github.com/ccfos/nightingale/issues) as the preferred channel for issue feedback and requirement submission;
- You can join our WeChat group
<img src="doc/img/n9e-vx-new.png" width="180">
......@@ -57,12 +57,12 @@ Nightingale is an cloud-native monitoring system by All-In-On design, support en
## Contributing
We welcome your participation in the Nightingale open source project and open source community in a variety of ways:
- Feedback on problems and bugs => [github issue](https://github.com/didi/nightingale/issues)
- Feedback on problems and bugs => [github issue](https://github.com/ccfos/nightingale/issues)
- Additional and improved documentation => [n9e.github.io](https://n9e.github.io/)
- Share your best practices and insights on using Nightingale => [User Story](https://github.com/didi/nightingale/issues/897)
- Join our community events => [Nightingale wechat group](https://s3-gz01.didistatic.com/n9e-pub/image/n9e-wx.png)
- Submit code to make Nightingale better =>[github PR](https://github.com/didi/nightingale/pulls)
- Share your best practices and insights on using Nightingale => [User Story](https://github.com/ccfos/nightingale/issues/897)
- Join our community events => [Nightingale wechat group](https://s3-gz01.ccfosstatic.com/n9e-pub/image/n9e-wx.png)
- Submit code to make Nightingale better =>[github PR](https://github.com/ccfos/nightingale/pulls)
## License
Nightingale with [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE) open source license.
Nightingale with [Apache License V2.0](https://github.com/ccfos/nightingale/blob/main/LICENSE) open source license.
package aconf
import (
"path"
"github.com/toolkits/pkg/runner"
)
type Alert struct {
EngineDelay int64
Heartbeat HeartbeatConfig
Alerting Alerting
SMTP SMTPConfig
Ibex Ibex
}
type SMTPConfig struct {
Host string
Port int
User string
Pass string
From string
InsecureSkipVerify bool
Batch int
}
type HeartbeatConfig struct {
IP string
Interval int64
Endpoint string
ClusterName string
}
type Alerting struct {
Timeout int64
TemplatesDir string
NotifyConcurrency int
}
type CallPlugin struct {
Enable bool
PluginPath string
Caller string
}
type RedisPub struct {
Enable bool
ChannelPrefix string
ChannelKey string
}
type Ibex struct {
Address string
BasicAuthUser string
BasicAuthPass string
Timeout int64
}
func (a *Alert) PreCheck() {
if a.Alerting.TemplatesDir == "" {
a.Alerting.TemplatesDir = path.Join(runner.Cwd, "etc", "template")
}
if a.Alerting.Timeout == 0 {
a.Alerting.Timeout = 30000
}
if a.Heartbeat.Interval == 0 {
a.Heartbeat.Interval = 1000
}
}
package alert
import (
"context"
"fmt"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/eval"
"github.com/ccfos/nightingale/v6/alert/naming"
"github.com/ccfos/nightingale/v6/alert/process"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/alert/record"
"github.com/ccfos/nightingale/v6/alert/router"
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
config, err := conf.InitConfig(configDir, cryptoKey)
if err != nil {
return nil, fmt.Errorf("failed to init config: %v", err)
}
logxClean, err := logx.Init(config.Log)
if err != nil {
return nil, err
}
db, err := storage.New(config.DB)
if err != nil {
return nil, err
}
ctx := ctx.NewContext(context.Background(), db)
syncStats := memsto.NewSyncStats()
alertStats := astats.NewSyncStats()
targetCache := memsto.NewTargetCache(ctx, syncStats)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
alertMuteCache := memsto.NewAlertMuteCache(ctx, syncStats)
alertRuleCache := memsto.NewAlertRuleCache(ctx, syncStats)
promClients := prom.NewPromClient(ctx, config.Alert.Heartbeat)
externalProcessors := process.NewExternalProcessors()
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, ctx, promClients)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
rt.Config(r)
httpClean := httpx.Init(config.HTTP, r)
return func() {
logxClean()
httpClean()
}, nil
}
func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, alertStats *astats.Stats, externalProcessors *process.ExternalProcessorsType, targetCache *memsto.TargetCacheType, busiGroupCache *memsto.BusiGroupCacheType,
alertMuteCache *memsto.AlertMuteCacheType, alertRuleCache *memsto.AlertRuleCacheType, ctx *ctx.Context, promClients *prom.PromClientMap) {
userCache := memsto.NewUserCache(ctx, syncStats)
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
alertSubscribeCache := memsto.NewAlertSubscribeCache(ctx, syncStats)
recordingRuleCache := memsto.NewRecordingRuleCache(ctx, syncStats)
webhookCache := memsto.NewWebhookCache(ctx)
notifyScript := memsto.NewNotifyScript(ctx)
go models.InitNotifyConfig(ctx, alertc.Alerting.TemplatesDir)
naming := naming.NewNaming(ctx, alertc.Heartbeat)
writers := writer.NewWriters(pushgwc)
record.NewScheduler(alertc, recordingRuleCache, promClients, writers, alertStats)
eval.NewScheduler(alertc, externalProcessors, alertRuleCache, targetCache, busiGroupCache, alertMuteCache, promClients, naming, ctx, alertStats)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, webhookCache, notifyScript, alertc.Alerting, alertc.Ibex, ctx)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp)
go dp.ReloadTpls()
go consumer.LoopConsume()
go queue.ReportQueueSize(alertStats)
go sender.StartEmailSender(alertc.SMTP)
}
package stat
package astats
import (
"github.com/prometheus/client_golang/prometheus"
......@@ -6,28 +6,21 @@ import (
const (
namespace = "n9e"
subsystem = "server"
subsystem = "alert"
)
var (
// 各个周期性任务的执行耗时
GaugeCronDuration = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "cron_duration",
Help: "Cron method use duration, unit: ms.",
}, []string{"name"})
// 从数据库同步数据的时候,同步的条数
GaugeSyncNumber = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "cron_sync_number",
Help: "Cron sync number.",
}, []string{"name"})
type Stats struct {
CounterSampleTotal *prometheus.CounterVec
CounterAlertsTotal *prometheus.CounterVec
GaugeAlertQueueSize prometheus.Gauge
GaugeSampleQueueSize *prometheus.GaugeVec
RequestDuration *prometheus.HistogramVec
ForwardDuration *prometheus.HistogramVec
}
func NewSyncStats() *Stats {
// 从各个接收接口接收到的监控数据总量
CounterSampleTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
CounterSampleTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "samples_received_total",
......@@ -35,7 +28,7 @@ var (
}, []string{"cluster", "channel"})
// 产生的告警总量
CounterAlertsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
CounterAlertsTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "alerts_total",
......@@ -43,7 +36,7 @@ var (
}, []string{"cluster"})
// 内存中的告警事件队列的长度
GaugeAlertQueueSize = prometheus.NewGauge(prometheus.GaugeOpts{
GaugeAlertQueueSize := prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "alert_queue_size",
......@@ -51,7 +44,7 @@ var (
})
// 数据转发队列,各个队列的长度
GaugeSampleQueueSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{
GaugeSampleQueueSize := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "sample_queue_size",
......@@ -59,7 +52,7 @@ var (
}, []string{"cluster", "channel_number"})
// 一些重要的请求,比如接收数据的请求,应该统计一下延迟情况
RequestDuration = prometheus.NewHistogramVec(
RequestDuration := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
......@@ -70,7 +63,7 @@ var (
)
// 发往后端TSDB,延迟如何
ForwardDuration = prometheus.NewHistogramVec(
ForwardDuration := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
......@@ -79,13 +72,8 @@ var (
Help: "Forward samples to TSDB. latencies in seconds.",
}, []string{"cluster", "channel_number"},
)
)
func Init() {
// Register the summary and the histogram with Prometheus's default registry.
prometheus.MustRegister(
GaugeCronDuration,
GaugeSyncNumber,
CounterSampleTotal,
CounterAlertsTotal,
GaugeAlertQueueSize,
......@@ -93,4 +81,13 @@ func Init() {
RequestDuration,
ForwardDuration,
)
return &Stats{
CounterSampleTotal: CounterSampleTotal,
CounterAlertsTotal: CounterAlertsTotal,
GaugeAlertQueueSize: GaugeAlertQueueSize,
GaugeSampleQueueSize: GaugeSampleQueueSize,
RequestDuration: RequestDuration,
ForwardDuration: ForwardDuration,
}
}
package conv
package common
import (
"fmt"
......@@ -8,20 +8,36 @@ import (
"github.com/prometheus/common/model"
)
type Vector struct {
type AnomalyPoint struct {
Key string `json:"key"`
Labels model.Metric `json:"labels"`
Timestamp int64 `json:"timestamp"`
Value float64 `json:"value"`
Severity int `json:"severity"`
}
func (v *Vector) ReadableValue() string {
func NewAnomalyPoint(key string, labels map[string]string, ts int64, value float64, severity int) AnomalyPoint {
anomalyPointLabels := make(model.Metric)
for k, v := range labels {
anomalyPointLabels[model.LabelName(k)] = model.LabelValue(v)
}
anomalyPointLabels[model.MetricNameLabel] = model.LabelValue(key)
return AnomalyPoint{
Key: key,
Labels: anomalyPointLabels,
Timestamp: ts,
Value: value,
Severity: severity,
}
}
func (v *AnomalyPoint) ReadableValue() string {
ret := fmt.Sprintf("%.5f", v.Value)
ret = strings.TrimRight(ret, "0")
return strings.TrimRight(ret, ".")
}
func ConvertVectors(value model.Value) (lst []Vector) {
func ConvertAnomalyPoints(value model.Value) (lst []AnomalyPoint) {
if value == nil {
return
}
......@@ -38,7 +54,7 @@ func ConvertVectors(value model.Value) (lst []Vector) {
continue
}
lst = append(lst, Vector{
lst = append(lst, AnomalyPoint{
Key: item.Metric.String(),
Timestamp: item.Timestamp.Unix(),
Value: float64(item.Value),
......@@ -62,7 +78,7 @@ func ConvertVectors(value model.Value) (lst []Vector) {
continue
}
lst = append(lst, Vector{
lst = append(lst, AnomalyPoint{
Key: item.Metric.String(),
Labels: item.Metric,
Timestamp: last.Timestamp.Unix(),
......@@ -79,7 +95,7 @@ func ConvertVectors(value model.Value) (lst []Vector) {
return
}
lst = append(lst, Vector{
lst = append(lst, AnomalyPoint{
Key: "{}",
Timestamp: item.Timestamp.Unix(),
Value: float64(item.Value),
......
package common
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
)
func RuleKey(datasourceId, id int64) string {
return fmt.Sprintf("alert-%d-%d", datasourceId, id)
}
func MatchTags(eventTagsMap map[string]string, itags []models.TagFilter) bool {
for _, filter := range itags {
value, has := eventTagsMap[filter.Key]
if !has {
return false
}
if !matchTag(value, filter) {
return false
}
}
return true
}
func matchTag(value string, filter models.TagFilter) bool {
switch filter.Func {
case "==":
return filter.Value == value
case "!=":
return filter.Value != value
case "in":
_, has := filter.Vset[value]
return has
case "not in":
_, has := filter.Vset[value]
return !has
case "=~":
return filter.Regexp.MatchString(value)
case "!~":
return !filter.Regexp.MatchString(value)
}
// unexpect func
return false
}
package engine
package dispatch
import (
"context"
"fmt"
"strconv"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/concurrent/semaphore"
"github.com/toolkits/pkg/logger"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/memsto"
)
func loopConsume(ctx context.Context) {
sema := semaphore.NewSemaphore(config.C.Alerting.NotifyConcurrency)
type Consumer struct {
alerting aconf.Alerting
ctx *ctx.Context
dispatch *Dispatch
}
// 创建一个 Consumer 实例
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch) *Consumer {
return &Consumer{
alerting: alerting,
ctx: ctx,
dispatch: dispatch,
}
}
func (e *Consumer) LoopConsume() {
sema := semaphore.NewSemaphore(e.alerting.NotifyConcurrency)
duration := time.Duration(100) * time.Millisecond
for {
events := EventQueue.PopBackBy(100)
events := queue.EventQueue.PopBackBy(100)
if len(events) == 0 {
time.Sleep(duration)
continue
}
consume(events, sema)
e.consume(events, sema)
}
}
func consume(events []interface{}, sema *semaphore.Semaphore) {
func (e *Consumer) consume(events []interface{}, sema *semaphore.Semaphore) {
for i := range events {
if events[i] == nil {
continue
......@@ -37,12 +52,12 @@ func consume(events []interface{}, sema *semaphore.Semaphore) {
sema.Acquire()
go func(event *models.AlertCurEvent) {
defer sema.Release()
consumeOne(event)
e.consumeOne(event)
}(event)
}
}
func consumeOne(event *models.AlertCurEvent) {
func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
LogEvent(event, "consume")
if err := event.ParseRule("rule_name"); err != nil {
......@@ -53,26 +68,32 @@ func consumeOne(event *models.AlertCurEvent) {
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
}
persist(event)
if err := event.ParseRule("annotations"); err != nil {
event.Annotations = fmt.Sprintf("failed to parse rule note: %v", err)
}
logger.Info("event_persist: event.Annotations", event.Annotations)
e.persist(event)
if event.IsRecovered && event.NotifyRecovered == 0 {
return
}
HandleEventNotify(event, false)
e.dispatch.HandleEventNotify(event, false)
}
func persist(event *models.AlertCurEvent) {
has, err := models.AlertCurEventExists("hash=?", event.Hash)
func (e *Consumer) persist(event *models.AlertCurEvent) {
has, err := models.AlertCurEventExists(e.ctx, "hash=?", event.Hash)
if err != nil {
logger.Errorf("event_persist_check_exists_fail: %v rule_id=%d hash=%s", err, event.RuleId, event.Hash)
return
}
his := event.ToHis()
his := event.ToHis(e.ctx)
// 不管是告警还是恢复,全量告警里都要记录
if err := his.Add(); err != nil {
if err := his.Add(e.ctx); err != nil {
logger.Errorf(
"event_persist_his_fail: %v rule_id=%d cluster:%s hash=%s tags=%v timestamp=%d value=%s",
err,
......@@ -87,7 +108,7 @@ func persist(event *models.AlertCurEvent) {
if has {
// 活跃告警表中有记录,删之
err = models.AlertCurEventDelByHash(event.Hash)
err = models.AlertCurEventDelByHash(e.ctx, event.Hash)
if err != nil {
logger.Errorf("event_del_cur_fail: %v hash=%s", err, event.Hash)
return
......@@ -98,7 +119,7 @@ func persist(event *models.AlertCurEvent) {
// use his id as cur id
event.Id = his.Id
if event.Id > 0 {
if err := event.Add(); err != nil {
if err := event.Add(e.ctx); err != nil {
logger.Errorf(
"event_persist_cur_fail: %v rule_id=%d cluster:%s hash=%s tags=%v timestamp=%d value=%s",
err,
......@@ -124,7 +145,7 @@ func persist(event *models.AlertCurEvent) {
// use his id as cur id
event.Id = his.Id
if event.Id > 0 {
if err := event.Add(); err != nil {
if err := event.Add(e.ctx); err != nil {
logger.Errorf(
"event_persist_cur_fail: %v rule_id=%d cluster:%s hash=%s tags=%v timestamp=%d value=%s",
err,
......@@ -138,35 +159,3 @@ func persist(event *models.AlertCurEvent) {
}
}
}
// for alerting
func fillUsers(e *models.AlertCurEvent) {
gids := make([]int64, 0, len(e.NotifyGroupsJSON))
for i := 0; i < len(e.NotifyGroupsJSON); i++ {
gid, err := strconv.ParseInt(e.NotifyGroupsJSON[i], 10, 64)
if err != nil {
continue
}
gids = append(gids, gid)
}
e.NotifyGroupsObj = memsto.UserGroupCache.GetByUserGroupIds(gids)
uids := make(map[int64]struct{})
for i := 0; i < len(e.NotifyGroupsObj); i++ {
ug := e.NotifyGroupsObj[i]
for j := 0; j < len(ug.UserIds); j++ {
uids[ug.UserIds[j]] = struct{}{}
}
}
e.NotifyUsersObj = memsto.UserCache.GetByUserIds(mapKeys(uids))
}
func mapKeys(m map[int64]struct{}) []int64 {
lst := make([]int64, 0, len(m))
for k := range m {
lst = append(lst, k)
}
return lst
}
package engine
package dispatch
import (
"bytes"
"encoding/json"
"html/template"
"strconv"
"sync"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/common/sender"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/memsto"
"github.com/toolkits/pkg/logger"
)
var (
rwLock sync.RWMutex
type Dispatch struct {
alertRuleCache *memsto.AlertRuleCacheType
userCache *memsto.UserCacheType
userGroupCache *memsto.UserGroupCacheType
alertSubscribeCache *memsto.AlertSubscribeCacheType
targetCache *memsto.TargetCacheType
webhookCache *memsto.WebhookCacheType
notifyScriptCache *memsto.NotifyScriptCacheType
alerting aconf.Alerting
ibex aconf.Ibex
senders map[string]sender.Sender
tpls map[string]*template.Template
Senders map[string]sender.Sender
// 处理事件到subscription关系,处理的subscription用OrMerge进行合并
routers = []Router{GroupRouter, GlobalWebhookRouter, EventCallbacksRouter}
// 额外去掉一些订阅,处理的subscription用AndMerge进行合并, 如设置 channel=false,合并后不通过这个channel发送
// 如果实现了相关Router,可以添加到interceptors中
interceptors []Router
ctx *ctx.Context
// 额外的订阅event逻辑处理
subscribeRouters = []Router{GroupRouter}
subscribeInterceptors []Router
)
RwLock sync.RWMutex
}
// 创建一个 Notify 实例
func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType,
alertSubscribeCache *memsto.AlertSubscribeCacheType, targetCache *memsto.TargetCacheType, webhookCache *memsto.WebhookCacheType, notifyScriptCache *memsto.NotifyScriptCacheType,
alerting aconf.Alerting, ibex aconf.Ibex, ctx *ctx.Context) *Dispatch {
notify := &Dispatch{
alertRuleCache: alertRuleCache,
userCache: userCache,
userGroupCache: userGroupCache,
alertSubscribeCache: alertSubscribeCache,
targetCache: targetCache,
webhookCache: webhookCache,
notifyScriptCache: notifyScriptCache,
alerting: alerting,
ibex: ibex,
senders: make(map[string]sender.Sender),
tpls: make(map[string]*template.Template),
ctx: ctx,
}
return notify
}
func (e *Dispatch) ReloadTpls() error {
err := e.relaodTpls()
if err != nil {
logger.Error("failed to reload tpls: %v", err)
}
duration := time.Duration(9000) * time.Millisecond
for {
time.Sleep(duration)
if err := e.relaodTpls(); err != nil {
logger.Warning("failed to reload tpls:", err)
}
}
}
func reloadTpls() error {
tmpTpls, err := config.C.Alerting.ListTpls()
func (e *Dispatch) relaodTpls() error {
tmpTpls, err := models.ListTpls(e.ctx)
if err != nil {
return err
}
......@@ -44,91 +92,96 @@ func reloadTpls() error {
models.Telegram: sender.NewSender(models.Telegram, tmpTpls),
}
rwLock.Lock()
tpls = tmpTpls
Senders = senders
rwLock.Unlock()
e.RwLock.Lock()
e.tpls = tmpTpls
e.senders = senders
e.RwLock.Unlock()
return nil
}
// HandleEventNotify 处理event事件的主逻辑
// event: 告警/恢复事件
// isSubscribe: 告警事件是否由subscribe的配置产生
func HandleEventNotify(event *models.AlertCurEvent, isSubscribe bool) {
rule := memsto.AlertRuleCache.Get(event.RuleId)
func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bool) {
rule := e.alertRuleCache.Get(event.RuleId)
if rule == nil {
return
}
fillUsers(event)
fillUsers(event, e.userCache, e.userGroupCache)
var (
handlers []Router
interceptorHandlers []Router
// 处理事件到 notifyTarget 关系,处理的notifyTarget用OrMerge进行合并
handlers []NotifyTargetDispatch
// 额外去掉一些订阅,处理的notifyTarget用AndMerge进行合并, 如设置 channel=false,合并后不通过这个channel发送
// 如果实现了相关 Dispatch,可以添加到interceptors中
interceptorHandlers []NotifyTargetDispatch
)
if isSubscribe {
handlers = subscribeRouters
interceptorHandlers = subscribeInterceptors
handlers = []NotifyTargetDispatch{NotifyGroupDispatch, EventCallbacksDispatch}
} else {
handlers = routers
interceptorHandlers = interceptors
handlers = []NotifyTargetDispatch{NotifyGroupDispatch, GlobalWebhookDispatch, EventCallbacksDispatch}
}
subscription := NewSubscription()
notifyTarget := NewNotifyTarget()
// 处理订阅关系使用OrMerge
for _, handler := range handlers {
subscription.OrMerge(handler(rule, event, subscription))
notifyTarget.OrMerge(handler(rule, event, notifyTarget, e))
}
// 处理移除订阅关系的逻辑,比如员工离职,临时静默某个通道的策略等
for _, handler := range interceptorHandlers {
subscription.AndMerge(handler(rule, event, subscription))
notifyTarget.AndMerge(handler(rule, event, notifyTarget, e))
}
// 处理事件发送,这里用一个goroutine处理一个event的所有发送事件
go Send(rule, event, subscription, isSubscribe)
go e.Send(rule, event, notifyTarget, isSubscribe)
// 如果是不是订阅规则出现的event,则需要处理订阅规则的event
// 如果是不是订阅规则出现的event, 则需要处理订阅规则的event
if !isSubscribe {
handleSubs(event)
e.handleSubs(event)
}
}
func handleSubs(event *models.AlertCurEvent) {
func (e *Dispatch) handleSubs(event *models.AlertCurEvent) {
// handle alert subscribes
subscribes := make([]*models.AlertSubscribe, 0)
// rule specific subscribes
if subs, has := memsto.AlertSubscribeCache.Get(event.RuleId); has {
if subs, has := e.alertSubscribeCache.Get(event.RuleId); has {
subscribes = append(subscribes, subs...)
}
// global subscribes
if subs, has := memsto.AlertSubscribeCache.Get(0); has {
if subs, has := e.alertSubscribeCache.Get(0); has {
subscribes = append(subscribes, subs...)
}
for _, sub := range subscribes {
handleSub(sub, *event)
e.handleSub(sub, *event)
}
}
// handleSub 处理订阅规则的event,注意这里event要使用值传递,因为后面会修改event的状态
func handleSub(sub *models.AlertSubscribe, event models.AlertCurEvent) {
if sub.IsDisabled() || !sub.MatchCluster(event.Cluster) {
func (e *Dispatch) handleSub(sub *models.AlertSubscribe, event models.AlertCurEvent) {
if sub.IsDisabled() || !sub.MatchCluster(event.DatasourceId) {
return
}
if !matchTags(event.TagsMap, sub.ITags) {
if !common.MatchTags(event.TagsMap, sub.ITags) {
return
}
if sub.ForDuration > (event.TriggerTime - event.FirstTriggerTime) {
return
}
sub.ModifyEvent(&event)
LogEvent(&event, "subscribe")
HandleEventNotify(&event, true)
e.HandleEventNotify(&event, true)
}
func Send(rule *models.AlertRule, event *models.AlertCurEvent, subscription *Subscription, isSubscribe bool) {
for channel, uids := range subscription.ToChannelUserMap() {
ctx := sender.BuildMessageContext(rule, event, uids)
rwLock.RLock()
s := Senders[channel]
rwLock.RUnlock()
func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, notifyTarget *NotifyTarget, isSubscribe bool) {
for channel, uids := range notifyTarget.ToChannelUserMap() {
ctx := sender.BuildMessageContext(rule, event, uids, e.userCache)
e.RwLock.RLock()
s := e.senders[channel]
e.RwLock.RUnlock()
if s == nil {
logger.Warningf("no sender for channel: %s", channel)
continue
......@@ -137,20 +190,13 @@ func Send(rule *models.AlertRule, event *models.AlertCurEvent, subscription *Sub
}
// handle event callbacks
sender.SendCallbacks(subscription.ToCallbackList(), event)
sender.SendCallbacks(e.ctx, notifyTarget.ToCallbackList(), event, e.targetCache, e.ibex)
// handle global webhooks
sender.SendWebhooks(subscription.ToWebhookList(), event)
noticeBytes := genNoticeBytes(event)
sender.SendWebhooks(notifyTarget.ToWebhookList(), event)
// handle plugin call
go sender.MayPluginNotify(noticeBytes)
if !isSubscribe {
// handle redis pub
sender.PublishToRedis(event.Cluster, noticeBytes)
}
go sender.MayPluginNotify(e.genNoticeBytes(event), e.notifyScriptCache)
}
type Notice struct {
......@@ -158,13 +204,13 @@ type Notice struct {
Tpls map[string]string `json:"tpls"`
}
func genNoticeBytes(event *models.AlertCurEvent) []byte {
func (e *Dispatch) genNoticeBytes(event *models.AlertCurEvent) []byte {
// build notice body with templates
ntpls := make(map[string]string)
rwLock.RLock()
defer rwLock.RUnlock()
for filename, tpl := range tpls {
e.RwLock.RLock()
defer e.RwLock.RUnlock()
for filename, tpl := range e.tpls {
var body bytes.Buffer
if err := tpl.Execute(&body, event); err != nil {
ntpls[filename] = err.Error()
......@@ -182,3 +228,35 @@ func genNoticeBytes(event *models.AlertCurEvent) []byte {
return stdinBytes
}
// for alerting
func fillUsers(ce *models.AlertCurEvent, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType) {
gids := make([]int64, 0, len(ce.NotifyGroupsJSON))
for i := 0; i < len(ce.NotifyGroupsJSON); i++ {
gid, err := strconv.ParseInt(ce.NotifyGroupsJSON[i], 10, 64)
if err != nil {
continue
}
gids = append(gids, gid)
}
ce.NotifyGroupsObj = ugc.GetByUserGroupIds(gids)
uids := make(map[int64]struct{})
for i := 0; i < len(ce.NotifyGroupsObj); i++ {
ug := ce.NotifyGroupsObj[i]
for j := 0; j < len(ug.UserIds); j++ {
uids[ug.UserIds[j]] = struct{}{}
}
}
ce.NotifyUsersObj = uc.GetByUserIds(mapKeys(uids))
}
func mapKeys(m map[int64]struct{}) []int64 {
lst := make([]int64, 0, len(m))
for k := range m {
lst = append(lst, k)
}
return lst
}
package engine
package dispatch
import (
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/models"
"github.com/didi/nightingale/v5/src/models"
"github.com/toolkits/pkg/logger"
)
func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
......
package dispatch
// NotifyChannels channelKey -> bool
type NotifyChannels map[string]bool
func NewNotifyChannels(channels []string) NotifyChannels {
nc := make(NotifyChannels)
for _, ch := range channels {
nc[ch] = true
}
return nc
}
func (nc NotifyChannels) OrMerge(other NotifyChannels) {
nc.merge(other, func(a, b bool) bool { return a || b })
}
func (nc NotifyChannels) AndMerge(other NotifyChannels) {
nc.merge(other, func(a, b bool) bool { return a && b })
}
func (nc NotifyChannels) merge(other NotifyChannels, f func(bool, bool) bool) {
if other == nil {
return
}
for k, v := range other {
if curV, has := nc[k]; has {
nc[k] = f(curV, v)
} else {
nc[k] = v
}
}
}
package engine
package dispatch
import (
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/config"
)
// NotifyChannels channelKey -> bool
type NotifyChannels map[string]bool
func NewNotifyChannels(channels []string) NotifyChannels {
nc := make(NotifyChannels)
for _, ch := range channels {
nc[ch] = true
}
return nc
}
"strconv"
func (nc NotifyChannels) OrMerge(other NotifyChannels) {
nc.merge(other, func(a, b bool) bool { return a || b })
}
func (nc NotifyChannels) AndMerge(other NotifyChannels) {
nc.merge(other, func(a, b bool) bool { return a && b })
}
func (nc NotifyChannels) merge(other NotifyChannels, f func(bool, bool) bool) {
if other == nil {
return
}
for k, v := range other {
if curV, has := nc[k]; has {
nc[k] = f(curV, v)
} else {
nc[k] = v
}
}
}
"github.com/ccfos/nightingale/v6/models"
)
// Subscription 维护所有需要发送的用户-通道/回调/钩子信息,用map维护的数据结构具有去重功能
type Subscription struct {
// NotifyTarget 维护所有需要发送的目标 用户-通道/回调/钩子信息,用map维护的数据结构具有去重功能
type NotifyTarget struct {
userMap map[int64]NotifyChannels
webhooks map[string]config.Webhook
webhooks map[string]*models.Webhook
callbacks map[string]struct{}
}
func NewSubscription() *Subscription {
return &Subscription{
func NewNotifyTarget() *NotifyTarget {
return &NotifyTarget{
userMap: make(map[int64]NotifyChannels),
webhooks: make(map[string]config.Webhook),
webhooks: make(map[string]*models.Webhook),
callbacks: make(map[string]struct{}),
}
}
// NewSubscriptionFromUsers 根据用户的token配置,生成订阅信息,用于notifyMaintainer
func NewSubscriptionFromUsers(users []*models.User) *Subscription {
s := NewSubscription()
for _, u := range users {
if u == nil {
continue
}
for channel, token := range u.ExtractAllToken() {
if token == "" {
continue
}
if channelMap, has := s.userMap[u.Id]; has {
channelMap[channel] = true
} else {
s.userMap[u.Id] = map[string]bool{
channel: true,
}
}
}
}
return s
}
// OrMerge 将channelMap按照or的方式合并,方便实现多种组合的策略,比如根据某个tag进行路由等
func (s *Subscription) OrMerge(other *Subscription) {
// OrMerge 将 channelMap 按照 or 的方式合并,方便实现多种组合的策略,比如根据某个 tag 进行路由等
func (s *NotifyTarget) OrMerge(other *NotifyTarget) {
s.merge(other, NotifyChannels.OrMerge)
}
// AndMerge 将channelMap中的bool值按照and的逻辑进行合并,可以单独将人/通道维度的通知移除
// AndMerge 将 channelMap 中的 bool 值按照 and 的逻辑进行合并,可以单独将人/通道维度的通知移除
// 常用的场景有:
// 1. 人员离职了不需要发送告警了
// 2. 某个告警通道进行维护,暂时不需要发送告警了
// 3. 业务值班的重定向逻辑,将高等级的告警额外发送给应急人员等
// 可以结合业务需求自己实现router
func (s *Subscription) AndMerge(other *Subscription) {
func (s *NotifyTarget) AndMerge(other *NotifyTarget) {
s.merge(other, NotifyChannels.AndMerge)
}
func (s *Subscription) merge(other *Subscription, f func(NotifyChannels, NotifyChannels)) {
func (s *NotifyTarget) merge(other *NotifyTarget, f func(NotifyChannels, NotifyChannels)) {
if other == nil {
return
}
......@@ -110,7 +56,7 @@ func (s *Subscription) merge(other *Subscription, f func(NotifyChannels, NotifyC
}
// ToChannelUserMap userMap(map[uid][channel]bool) 转换为 map[channel][]uid 的结构
func (s *Subscription) ToChannelUserMap() map[string][]int64 {
func (s *NotifyTarget) ToChannelUserMap() map[string][]int64 {
m := make(map[string][]int64)
for uid, nc := range s.userMap {
for ch, send := range nc {
......@@ -122,7 +68,7 @@ func (s *Subscription) ToChannelUserMap() map[string][]int64 {
return m
}
func (s *Subscription) ToCallbackList() []string {
func (s *NotifyTarget) ToCallbackList() []string {
callbacks := make([]string, 0, len(s.callbacks))
for cb := range s.callbacks {
callbacks = append(callbacks, cb)
......@@ -130,10 +76,59 @@ func (s *Subscription) ToCallbackList() []string {
return callbacks
}
func (s *Subscription) ToWebhookList() []config.Webhook {
webhooks := make([]config.Webhook, 0, len(s.webhooks))
func (s *NotifyTarget) ToWebhookList() []*models.Webhook {
webhooks := make([]*models.Webhook, 0, len(s.webhooks))
for _, wh := range s.webhooks {
webhooks = append(webhooks, wh)
}
return webhooks
}
// Dispatch 抽象由告警事件到信息接收者的路由策略
// rule: 告警规则
// event: 告警事件
// prev: 前一次路由结果, Dispatch 的实现可以直接修改 prev, 也可以返回一个新的 NotifyTarget 用于 AndMerge/OrMerge
type NotifyTargetDispatch func(rule *models.AlertRule, event *models.AlertCurEvent, prev *NotifyTarget, dispatch *Dispatch) *NotifyTarget
// GroupDispatch 处理告警规则的组订阅关系
func NotifyGroupDispatch(rule *models.AlertRule, event *models.AlertCurEvent, prev *NotifyTarget, dispatch *Dispatch) *NotifyTarget {
groupIds := make([]int64, 0, len(event.NotifyGroupsJSON))
for _, groupId := range event.NotifyGroupsJSON {
gid, err := strconv.ParseInt(groupId, 10, 64)
if err != nil {
continue
}
groupIds = append(groupIds, gid)
}
groups := dispatch.userGroupCache.GetByUserGroupIds(groupIds)
NotifyTarget := NewNotifyTarget()
for _, group := range groups {
for _, userId := range group.UserIds {
NotifyTarget.userMap[userId] = NewNotifyChannels(event.NotifyChannelsJSON)
}
}
return NotifyTarget
}
func GlobalWebhookDispatch(rule *models.AlertRule, event *models.AlertCurEvent, prev *NotifyTarget, dispatch *Dispatch) *NotifyTarget {
webhooks := dispatch.webhookCache.GetWebhooks()
NotifyTarget := NewNotifyTarget()
for _, webhook := range webhooks {
if !webhook.Enable {
continue
}
NotifyTarget.webhooks[webhook.Url] = webhook
}
return NotifyTarget
}
func EventCallbacksDispatch(rule *models.AlertRule, event *models.AlertCurEvent, prev *NotifyTarget, dispatch *Dispatch) *NotifyTarget {
for _, c := range event.CallbacksJSON {
if c == "" {
continue
}
prev.callbacks[c] = struct{}{}
}
return nil
}
package eval
import (
"context"
"fmt"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/naming"
"github.com/ccfos/nightingale/v6/alert/process"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/prom"
)
type Scheduler struct {
// key: hash
alertRules map[string]*AlertRuleWorker
ExternalProcessors *process.ExternalProcessorsType
aconf aconf.Alert
alertRuleCache *memsto.AlertRuleCacheType
targetCache *memsto.TargetCacheType
busiGroupCache *memsto.BusiGroupCacheType
alertMuteCache *memsto.AlertMuteCacheType
promClients *prom.PromClientMap
naming *naming.Naming
ctx *ctx.Context
stats *astats.Stats
}
func NewScheduler(aconf aconf.Alert, externalProcessors *process.ExternalProcessorsType, arc *memsto.AlertRuleCacheType, targetCache *memsto.TargetCacheType,
busiGroupCache *memsto.BusiGroupCacheType, alertMuteCache *memsto.AlertMuteCacheType, promClients *prom.PromClientMap, naming *naming.Naming,
ctx *ctx.Context, stats *astats.Stats) *Scheduler {
scheduler := &Scheduler{
aconf: aconf,
alertRules: make(map[string]*AlertRuleWorker),
// recordRules: make(map[string]RuleContext),
// externalAlertRules: make(map[string]*eval.AlertRuleWorker),
ExternalProcessors: externalProcessors,
alertRuleCache: arc,
targetCache: targetCache,
busiGroupCache: busiGroupCache,
alertMuteCache: alertMuteCache,
promClients: promClients,
naming: naming,
ctx: ctx,
stats: stats,
}
go scheduler.LoopSyncRules(context.Background())
return scheduler
}
func (s *Scheduler) LoopSyncRules(ctx context.Context) {
time.Sleep(time.Duration(s.aconf.EngineDelay) * time.Second)
duration := 9000 * time.Millisecond
for {
select {
case <-ctx.Done():
return
case <-time.After(duration):
s.syncAlertRules()
}
}
}
func (s *Scheduler) syncAlertRules() {
ids := s.alertRuleCache.GetRuleIds()
alertRuleWorkers := make(map[string]*AlertRuleWorker)
externalRuleWorkers := make(map[string]*process.Processor)
for _, id := range ids {
rule := s.alertRuleCache.Get(id)
if rule == nil {
continue
}
if rule.IsPrometheusRule() {
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
for _, dsId := range datasourceIds {
if !naming.DatasourceHashRing.IsHit(dsId, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
continue
}
processor := process.NewProcessor(rule, dsId, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.promClients, s.ctx, s.stats)
alertRule := NewAlertRuleWorker(rule, dsId, processor, s.promClients, s.ctx)
alertRuleWorkers[alertRule.Hash()] = alertRule
}
} else if rule.IsHostRule() && s.naming.IamLeader() {
// all host rule will be processed by leader
processor := process.NewProcessor(rule, 0, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.promClients, s.ctx, s.stats)
alertRule := NewAlertRuleWorker(rule, 0, processor, s.promClients, s.ctx)
alertRuleWorkers[alertRule.Hash()] = alertRule
} else {
// 如果 rule 不是通过 prometheus engine 来告警的,则创建为 externalRule
// if rule is not processed by prometheus engine, create it as externalRule
for _, dsId := range rule.DatasourceIdsJson {
processor := process.NewProcessor(rule, dsId, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.promClients, s.ctx, s.stats)
externalRuleWorkers[processor.Key()] = processor
}
}
}
for hash, rule := range alertRuleWorkers {
if _, has := s.alertRules[hash]; !has {
rule.Prepare()
rule.Start()
s.alertRules[hash] = rule
}
}
for hash, rule := range s.alertRules {
if _, has := alertRuleWorkers[hash]; !has {
rule.Stop()
delete(s.alertRules, hash)
}
}
s.ExternalProcessors.ExternalLock.Lock()
for key, processor := range externalRuleWorkers {
if curProcessor, has := s.ExternalProcessors.Processors[key]; has {
// rule存在,且hash一致,认为没有变更,这里可以根据需求单独实现一个关联数据更多的hash函数
if processor.Hash() == curProcessor.Hash() {
continue
}
}
// 现有规则中没有rule以及有rule但hash不一致的场景,需要触发rule的update
processor.RecoverAlertCurEventFromDb()
s.ExternalProcessors.Processors[key] = processor
}
for key := range s.ExternalProcessors.Processors {
if _, has := externalRuleWorkers[key]; !has {
delete(s.ExternalProcessors.Processors, key)
}
}
s.ExternalProcessors.ExternalLock.Unlock()
}
package eval
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/process"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
promsdk "github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/ccfos/nightingale/v6/prom"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
type AlertRuleWorker struct {
datasourceId int64
quit chan struct{}
inhibit bool
severity int
rule *models.AlertRule
processor *process.Processor
promClients *prom.PromClientMap
ctx *ctx.Context
}
func NewAlertRuleWorker(rule *models.AlertRule, datasourceId int64, processor *process.Processor, promClients *prom.PromClientMap, ctx *ctx.Context) *AlertRuleWorker {
arw := &AlertRuleWorker{
datasourceId: datasourceId,
quit: make(chan struct{}),
rule: rule,
processor: processor,
promClients: promClients,
ctx: ctx,
}
return arw
}
func (arw *AlertRuleWorker) Key() string {
return common.RuleKey(arw.datasourceId, arw.rule.Id)
}
func (arw *AlertRuleWorker) Hash() string {
return str.MD5(fmt.Sprintf("%d_%d_%s_%d",
arw.rule.Id,
arw.rule.PromEvalInterval,
arw.rule.RuleConfig,
arw.datasourceId,
))
}
func (arw *AlertRuleWorker) Prepare() {
arw.processor.RecoverAlertCurEventFromDb()
}
func (arw *AlertRuleWorker) Start() {
logger.Infof("eval:%s started", arw.Key())
interval := arw.rule.PromEvalInterval
if interval <= 0 {
interval = 10
}
go func() {
for {
select {
case <-arw.quit:
return
default:
arw.Eval()
time.Sleep(time.Duration(interval) * time.Second)
}
}
}()
}
func (arw *AlertRuleWorker) Eval() {
cachedRule := arw.rule
if cachedRule == nil {
logger.Errorf("rule_eval:%s rule not found", arw.Key())
return
}
typ := cachedRule.GetRuleType()
var lst []common.AnomalyPoint
switch typ {
case models.PROMETHEUS:
lst = arw.GetPromAnomalyPoint(cachedRule.RuleConfig)
case models.HOST:
lst = arw.GetHostAnomalyPoint(cachedRule.RuleConfig)
default:
return
}
arw.processor.Handle(lst, "inner", arw.inhibit)
}
func (arw *AlertRuleWorker) Stop() {
logger.Infof("%s stopped", arw.Key())
close(arw.quit)
}
func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) []common.AnomalyPoint {
var lst []common.AnomalyPoint
var severity int
var rule *models.PromRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
return lst
}
if rule == nil {
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
return lst
}
arw.inhibit = rule.Inhibit
for _, query := range rule.Queries {
if query.Severity < severity {
arw.severity = query.Severity
}
promql := strings.TrimSpace(query.PromQl)
if promql == "" {
logger.Errorf("rule_eval:%s promql is blank", arw.Key())
continue
}
if arw.promClients.IsNil(arw.datasourceId) {
logger.Errorf("rule_eval:%s error reader client is nil", arw.Key())
continue
}
readerClient := arw.promClients.GetCli(arw.datasourceId)
var warnings promsdk.Warnings
value, warnings, err := readerClient.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("rule_eval:%s promql:%s, error:%v", arw.Key(), promql, err)
continue
}
if len(warnings) > 0 {
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", arw.Key(), promql, warnings)
continue
}
logger.Debugf("rule_eval:%s query:%+v, value:%v", arw.Key(), query, value)
points := common.ConvertAnomalyPoints(value)
for i := 0; i < len(points); i++ {
points[i].Severity = query.Severity
}
lst = append(lst, points...)
}
return lst
}
func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.AnomalyPoint {
var lst []common.AnomalyPoint
var severity int
var rule *models.HostRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
return lst
}
if rule == nil {
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
return lst
}
arw.inhibit = rule.Inhibit
now := time.Now().Unix()
for _, trigger := range rule.Triggers {
if trigger.Severity < severity {
arw.severity = trigger.Severity
}
query := models.GetHostsQuery(rule.Queries)
switch trigger.Type {
case "target_miss", "offset":
t := now - int64(trigger.Duration)
if trigger.Type == "offset" {
t = int64(trigger.Duration)
}
hosts, err := models.TargetGetsByFilter(arw.ctx, query, trigger.Type, t, 0, 0)
if err != nil {
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
continue
}
for _, host := range hosts {
m := make(map[string]string)
m["ident"] = host.Ident
lst = append(lst, common.NewAnomalyPoint(trigger.Type, m, now, float64(t), trigger.Severity))
}
case "pct_target_miss":
AllCount, err := models.TargetCountByFilter(arw.ctx, query, "", 0)
if err != nil {
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
continue
}
missCount, err := models.TargetCountByFilter(arw.ctx, query, trigger.Type, now-int64(trigger.Duration))
if err != nil {
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
continue
}
pct := float64(missCount) / float64(AllCount) * 100
if pct >= float64(trigger.Percent) {
lst = append(lst, common.NewAnomalyPoint(trigger.Type, nil, now, pct, trigger.Severity))
}
}
}
return lst
}
package engine
package mute
import (
"strconv"
"strings"
"time"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/memsto"
"github.com/toolkits/pkg/logger"
)
type MuteStrategyFunc func(rule *models.AlertRule, event *models.AlertCurEvent) bool
func IsMuted(rule *models.AlertRule, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType, alertMuteCache *memsto.AlertMuteCacheType) bool {
if TimeNonEffectiveMuteStrategy(rule, event) {
return true
}
var AlertMuteStrategies = []MuteStrategyFunc{
TimeNonEffectiveMuteStrategy,
IdentNotExistsMuteStrategy,
BgNotMatchMuteStrategy,
EventMuteStrategy,
}
if IdentNotExistsMuteStrategy(rule, event, targetCache) {
return true
}
func IsMuted(rule *models.AlertRule, event *models.AlertCurEvent) bool {
for _, strategyFunc := range AlertMuteStrategies {
if strategyFunc(rule, event) {
return true
}
if BgNotMatchMuteStrategy(rule, event, targetCache) {
return true
}
if EventMuteStrategy(event, alertMuteCache) {
return true
}
return false
}
......@@ -65,12 +68,12 @@ func TimeNonEffectiveMuteStrategy(rule *models.AlertRule, event *models.AlertCur
}
// IdentNotExistsMuteStrategy 根据ident是否存在过滤,如果ident不存在,则target_up的告警直接过滤掉
func IdentNotExistsMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent) bool {
func IdentNotExistsMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType) bool {
ident, has := event.TagsMap["ident"]
if !has {
return false
}
_, exists := memsto.TargetCache.Get(ident)
_, exists := targetCache.Get(ident)
// 如果是target_up的告警,且ident已经不存在了,直接过滤掉
// 这里的判断有点太粗暴了,但是目前没有更好的办法
if !exists && strings.Contains(rule.PromQl, "target_up") {
......@@ -81,7 +84,7 @@ func IdentNotExistsMuteStrategy(rule *models.AlertRule, event *models.AlertCurEv
}
// BgNotMatchMuteStrategy 当规则开启只在bg内部告警时,对于非bg内部的机器过滤
func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent) bool {
func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType) bool {
// 没有开启BG内部告警,直接不过滤
if rule.EnableInBG == 0 {
return false
......@@ -92,7 +95,7 @@ func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent)
return false
}
target, exists := memsto.TargetCache.Get(ident)
target, exists := targetCache.Get(ident)
// 对于包含ident的告警事件,check一下ident所属bg和rule所属bg是否相同
// 如果告警规则选择了只在本BG生效,那其他BG的机器就不能因此规则产生告警
if exists && target.GroupId != rule.GroupId {
......@@ -102,8 +105,8 @@ func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent)
return false
}
func EventMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent) bool {
mutes, has := memsto.AlertMuteCache.Gets(event.GroupId)
func EventMuteStrategy(event *models.AlertCurEvent, alertMuteCache *memsto.AlertMuteCacheType) bool {
mutes, has := alertMuteCache.Gets(event.GroupId)
if !has || len(mutes) == 0 {
return false
}
......@@ -122,64 +125,54 @@ func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
if mute.Disabled == 1 {
return false
}
ts := event.TriggerTime
if len(clock) > 0 {
ts = clock[0]
}
// 如果不是全局的,判断 cluster
if mute.Cluster != models.ClusterAll {
// mute.Cluster 是一个字符串,可能是多个cluster的组合,比如"cluster1 cluster2"
clusters := strings.Fields(mute.Cluster)
cm := make(map[string]struct{}, len(clusters))
for i := 0; i < len(clusters); i++ {
cm[clusters[i]] = struct{}{}
// 如果不是全局的,判断 匹配的 datasource id
if !(len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] == 0) && event.DatasourceId != 0 {
idm := make(map[int64]struct{}, len(mute.DatasourceIdsJson))
for i := 0; i < len(mute.DatasourceIdsJson); i++ {
idm[mute.DatasourceIdsJson[i]] = struct{}{}
}
// 判断event.Cluster是否包含在cm
if _, has := cm[event.Cluster]; !has {
// 判断 event.datasourceId 是否包含在 idm
if _, has := idm[event.DatasourceId]; !has {
return false
}
}
if ts < mute.Btime || ts > mute.Etime {
return false
}
return matchTags(event.TagsMap, mute.ITags)
}
func matchTag(value string, filter models.TagFilter) bool {
switch filter.Func {
case "==":
return filter.Value == value
case "!=":
return filter.Value != value
case "in":
_, has := filter.Vset[value]
return has
case "not in":
_, has := filter.Vset[value]
return !has
case "=~":
return filter.Regexp.MatchString(value)
case "!~":
return !filter.Regexp.MatchString(value)
}
// unexpect func
return false
}
func matchTags(eventTagsMap map[string]string, itags []models.TagFilter) bool {
for _, filter := range itags {
value, has := eventTagsMap[filter.Key]
if !has {
var matchTime bool
if mute.MuteTimeType == models.TimeRange {
if ts < mute.Btime || ts > mute.Etime {
return false
}
if !matchTag(value, filter) {
return false
matchTime = true
} else if mute.MuteTimeType == models.Periodic {
tm := time.Unix(event.TriggerTime, 0)
triggerTime := tm.Format("15:04")
triggerWeek := strconv.Itoa(int(tm.Weekday()))
for i := 0; i < len(mute.PeriodicMutesJson); i++ {
if strings.Contains(mute.PeriodicMutesJson[i].EnableDaysOfWeek, triggerWeek) {
if mute.PeriodicMutesJson[i].EnableStime <= mute.PeriodicMutesJson[i].EnableEtime {
if triggerTime >= mute.PeriodicMutesJson[i].EnableStime && triggerTime < mute.PeriodicMutesJson[i].EnableEtime {
matchTime = true
break
}
} else {
if triggerTime < mute.PeriodicMutesJson[i].EnableStime || triggerTime >= mute.PeriodicMutesJson[i].EnableEtime {
matchTime = true
break
}
}
}
}
}
return true
if !matchTime {
return false
}
return common.MatchTags(event.TagsMap, mute.ITags)
}
......@@ -9,13 +9,13 @@ import (
const NodeReplicas = 500
type ClusterHashRingType struct {
type DatasourceHashRingType struct {
sync.RWMutex
Rings map[string]*consistent.Consistent
Rings map[int64]*consistent.Consistent
}
// for alert_rule sharding
var ClusterHashRing = ClusterHashRingType{Rings: make(map[string]*consistent.Consistent)}
var DatasourceHashRing = DatasourceHashRingType{Rings: make(map[int64]*consistent.Consistent)}
func NewConsistentHashRing(replicas int32, nodes []string) *consistent.Consistent {
ret := consistent.New()
......@@ -26,39 +26,39 @@ func NewConsistentHashRing(replicas int32, nodes []string) *consistent.Consisten
return ret
}
func RebuildConsistentHashRing(cluster string, nodes []string) {
func RebuildConsistentHashRing(datasourceId int64, nodes []string) {
r := consistent.New()
r.NumberOfReplicas = NodeReplicas
for i := 0; i < len(nodes); i++ {
r.Add(nodes[i])
}
ClusterHashRing.Set(cluster, r)
logger.Infof("hash ring %s rebuild %+v", cluster, r.Members())
DatasourceHashRing.Set(datasourceId, r)
logger.Infof("hash ring %d rebuild %+v", datasourceId, r.Members())
}
func (chr *ClusterHashRingType) GetNode(cluster, pk string) (string, error) {
func (chr *DatasourceHashRingType) GetNode(datasourceId int64, pk string) (string, error) {
chr.RLock()
defer chr.RUnlock()
_, exists := chr.Rings[cluster]
_, exists := chr.Rings[datasourceId]
if !exists {
chr.Rings[cluster] = NewConsistentHashRing(int32(NodeReplicas), []string{})
chr.Rings[datasourceId] = NewConsistentHashRing(int32(NodeReplicas), []string{})
}
return chr.Rings[cluster].Get(pk)
return chr.Rings[datasourceId].Get(pk)
}
func (chr *ClusterHashRingType) IsHit(cluster string, pk string, currentNode string) bool {
node, err := chr.GetNode(cluster, pk)
func (chr *DatasourceHashRingType) IsHit(datasourceId int64, pk string, currentNode string) bool {
node, err := chr.GetNode(datasourceId, pk)
if err != nil {
logger.Debugf("cluster:%s pk:%s failed to get node from hashring:%v", cluster, pk, err)
logger.Debugf("datasource id:%d pk:%s failed to get node from hashring:%v", datasourceId, pk, err)
return false
}
return node == currentNode
}
func (chr *ClusterHashRingType) Set(cluster string, r *consistent.Consistent) {
func (chr *DatasourceHashRingType) Set(datasourceId int64, r *consistent.Consistent) {
chr.RLock()
defer chr.RUnlock()
chr.Rings[cluster] = r
chr.Rings[datasourceId] = r
}
package naming
import (
"fmt"
"sort"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
type Naming struct {
ctx *ctx.Context
Heartbeat aconf.HeartbeatConfig
}
func NewNaming(ctx *ctx.Context, heartbeat aconf.HeartbeatConfig) *Naming {
naming := &Naming{
ctx: ctx,
Heartbeat: heartbeat,
}
naming.Heartbeats()
return naming
}
// local servers
var localss map[int64]string
func (n *Naming) Heartbeats() error {
localss = make(map[int64]string)
if err := n.heartbeat(); err != nil {
fmt.Println("failed to heartbeat:", err)
return err
}
go n.loopHeartbeat()
go n.loopDeleteInactiveInstances()
return nil
}
func (n *Naming) loopDeleteInactiveInstances() {
interval := time.Duration(10) * time.Minute
for {
time.Sleep(interval)
n.DeleteInactiveInstances()
}
}
func (n *Naming) DeleteInactiveInstances() {
err := models.DB(n.ctx).Where("clock < ?", time.Now().Unix()-600).Delete(new(models.AlertingEngines)).Error
if err != nil {
logger.Errorf("delete inactive instances err:%v", err)
}
}
func (n *Naming) loopHeartbeat() {
interval := time.Duration(n.Heartbeat.Interval) * time.Millisecond
for {
time.Sleep(interval)
if err := n.heartbeat(); err != nil {
logger.Warning(err)
}
}
}
func (n *Naming) heartbeat() error {
var datasourceIds []int64
var err error
// 在页面上维护实例和集群的对应关系
datasourceIds, err = models.GetDatasourceIdsByClusterName(n.ctx, n.Heartbeat.ClusterName)
if err != nil {
return err
}
if len(datasourceIds) == 0 {
err := models.AlertingEngineHeartbeatWithCluster(n.ctx, n.Heartbeat.Endpoint, n.Heartbeat.ClusterName, 0)
if err != nil {
logger.Warningf("heartbeat with cluster %s err:%v", "", err)
}
} else {
for i := 0; i < len(datasourceIds); i++ {
err := models.AlertingEngineHeartbeatWithCluster(n.ctx, n.Heartbeat.Endpoint, n.Heartbeat.ClusterName, datasourceIds[i])
if err != nil {
logger.Warningf("heartbeat with cluster %d err:%v", datasourceIds[i], err)
}
}
}
for i := 0; i < len(datasourceIds); i++ {
servers, err := n.ActiveServers(datasourceIds[i])
if err != nil {
logger.Warningf("hearbeat %d get active server err:%v", datasourceIds[i], err)
continue
}
sort.Strings(servers)
newss := strings.Join(servers, " ")
oldss, exists := localss[datasourceIds[i]]
if exists && oldss == newss {
continue
}
RebuildConsistentHashRing(datasourceIds[i], servers)
localss[datasourceIds[i]] = newss
}
return nil
}
func (n *Naming) ActiveServers(datasourceId int64) ([]string, error) {
if datasourceId == -1 {
return nil, fmt.Errorf("cluster is empty")
}
// 30秒内有心跳,就认为是活的
return models.AlertingEngineGetsInstances(n.ctx, "datasource_id = ? and clock > ?", datasourceId, time.Now().Unix()-30)
}
func (n *Naming) AllActiveServers() ([]string, error) {
// 30秒内有心跳,就认为是活的
return models.AlertingEngineGetsInstances(n.ctx, "clock > ?", time.Now().Unix()-30)
}
......@@ -3,23 +3,22 @@ package naming
import (
"sort"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/toolkits/pkg/logger"
)
func IamLeader(cluster string) (bool, error) {
servers, err := ActiveServers(cluster)
func (n *Naming) IamLeader() bool {
servers, err := n.AllActiveServers()
if err != nil {
logger.Errorf("failed to get active servers: %v", err)
return false, err
return false
}
if len(servers) == 0 {
logger.Errorf("active servers empty")
return false, err
return false
}
sort.Strings(servers)
return config.C.Heartbeat.Endpoint == servers[0], nil
return n.Heartbeat.Endpoint == servers[0]
}
package engine
package process
import (
"fmt"
"sort"
"strings"
"sync"
"github.com/toolkits/pkg/str"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/common/conv"
"github.com/didi/nightingale/v5/src/server/memsto"
"github.com/ccfos/nightingale/v6/models"
)
type AlertCurEventMap struct {
......@@ -18,6 +11,17 @@ type AlertCurEventMap struct {
Data map[string]*models.AlertCurEvent
}
func NewAlertCurEventMap(data map[string]*models.AlertCurEvent) *AlertCurEventMap {
if data == nil {
return &AlertCurEventMap{
Data: make(map[string]*models.AlertCurEvent),
}
}
return &AlertCurEventMap{
Data: data,
}
}
func (a *AlertCurEventMap) SetAll(data map[string]*models.AlertCurEvent) {
a.Lock()
defer a.Unlock()
......@@ -68,122 +72,3 @@ func (a *AlertCurEventMap) GetAll() map[string]*models.AlertCurEvent {
defer a.RUnlock()
return a.Data
}
func NewAlertCurEventMap(data map[string]*models.AlertCurEvent) *AlertCurEventMap {
if data == nil {
return &AlertCurEventMap{
Data: make(map[string]*models.AlertCurEvent),
}
}
return &AlertCurEventMap{
Data: data,
}
}
// AlertVector 包含一个告警事件的告警上下文
type AlertVector struct {
Ctx *AlertRuleContext
Rule *models.AlertRule
Vector conv.Vector
From string
tagsMap map[string]string
tagsArr []string
target string
targetNote string
groupName string
}
func NewAlertVector(ctx *AlertRuleContext, rule *models.AlertRule, vector conv.Vector, from string) *AlertVector {
if rule == nil {
rule = ctx.rule
}
av := &AlertVector{
Ctx: ctx,
Rule: rule,
Vector: vector,
From: from,
}
av.fillTags()
av.mayHandleIdent()
av.mayHandleGroup()
return av
}
func (av *AlertVector) Hash() string {
return str.MD5(fmt.Sprintf("%d_%s_%s", av.Rule.Id, av.Vector.Key, av.Ctx.cluster))
}
func (av *AlertVector) fillTags() {
// handle series tags
tagsMap := make(map[string]string)
for label, value := range av.Vector.Labels {
tagsMap[string(label)] = string(value)
}
// handle rule tags
for _, tag := range av.Rule.AppendTagsJSON {
arr := strings.SplitN(tag, "=", 2)
tagsMap[arr[0]] = arr[1]
}
tagsMap["rulename"] = av.Rule.Name
av.tagsMap = tagsMap
// handle tagsArr
av.tagsArr = labelMapToArr(tagsMap)
}
func (av *AlertVector) mayHandleIdent() {
// handle ident
if ident, has := av.tagsMap["ident"]; has {
if target, exists := memsto.TargetCache.Get(ident); exists {
av.target = target.Ident
av.targetNote = target.Note
}
}
}
func (av *AlertVector) mayHandleGroup() {
// handle bg
bg := memsto.BusiGroupCache.GetByBusiGroupId(av.Rule.GroupId)
if bg != nil {
av.groupName = bg.Name
}
}
func (av *AlertVector) BuildEvent(now int64) *models.AlertCurEvent {
event := av.Rule.GenerateNewEvent()
event.TriggerTime = av.Vector.Timestamp
event.TagsMap = av.tagsMap
event.Cluster = av.Ctx.cluster
event.Hash = av.Hash()
event.TargetIdent = av.target
event.TargetNote = av.targetNote
event.TriggerValue = av.Vector.ReadableValue()
event.TagsJSON = av.tagsArr
event.GroupName = av.groupName
event.Tags = strings.Join(av.tagsArr, ",,")
event.IsRecovered = false
if av.From == "inner" {
event.LastEvalTime = now
} else {
event.LastEvalTime = event.TriggerTime
}
return event
}
func labelMapToArr(m map[string]string) []string {
numLabels := len(m)
labelStrings := make([]string, 0, numLabels)
for label, value := range m {
labelStrings = append(labelStrings, fmt.Sprintf("%s=%s", label, value))
}
if numLabels > 1 {
sort.Strings(labelStrings)
}
return labelStrings
}
package process
import (
"bytes"
"fmt"
"html/template"
"sort"
"strings"
"sync"
"time"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/prom"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
type ExternalProcessorsType struct {
ExternalLock sync.RWMutex
Processors map[string]*Processor
}
var ExternalProcessors ExternalProcessorsType
func NewExternalProcessors() *ExternalProcessorsType {
return &ExternalProcessorsType{
Processors: make(map[string]*Processor),
}
}
func (e *ExternalProcessorsType) GetExternalAlertRule(datasourceId, id int64) (*Processor, bool) {
e.ExternalLock.RLock()
defer e.ExternalLock.RUnlock()
processor, has := e.Processors[common.RuleKey(datasourceId, id)]
return processor, has
}
type Processor struct {
datasourceId int64
quit chan struct{}
rule *models.AlertRule
fires *AlertCurEventMap
pendings *AlertCurEventMap
inhibit bool
tagsMap map[string]string
tagsArr []string
target string
targetNote string
groupName string
atertRuleCache *memsto.AlertRuleCacheType
targetCache *memsto.TargetCacheType
busiGroupCache *memsto.BusiGroupCacheType
alertMuteCache *memsto.AlertMuteCacheType
promClients *prom.PromClientMap
ctx *ctx.Context
stats *astats.Stats
}
func (arw *Processor) Key() string {
return common.RuleKey(arw.datasourceId, arw.rule.Id)
}
func (arw *Processor) Hash() string {
return str.MD5(fmt.Sprintf("%d_%d_%s_%d",
arw.rule.Id,
arw.rule.PromEvalInterval,
arw.rule.RuleConfig,
arw.datasourceId,
))
}
func NewProcessor(rule *models.AlertRule, datasourceId int64, atertRuleCache *memsto.AlertRuleCacheType, targetCache *memsto.TargetCacheType,
busiGroupCache *memsto.BusiGroupCacheType, alertMuteCache *memsto.AlertMuteCacheType, promClients *prom.PromClientMap, ctx *ctx.Context,
stats *astats.Stats) *Processor {
arw := &Processor{
datasourceId: datasourceId,
quit: make(chan struct{}),
rule: rule,
targetCache: targetCache,
busiGroupCache: busiGroupCache,
alertMuteCache: alertMuteCache,
atertRuleCache: atertRuleCache,
promClients: promClients,
ctx: ctx,
stats: stats,
}
arw.mayHandleGroup()
return arw
}
func (arw *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inhibit bool) {
// 有可能rule的一些配置已经发生变化,比如告警接收人、callbacks等
// 这些信息的修改是不会引起worker restart的,但是确实会影响告警处理逻辑
// 所以,这里直接从memsto.AlertRuleCache中获取并覆盖
arw.inhibit = inhibit
arw.rule = arw.atertRuleCache.Get(arw.rule.Id)
cachedRule := arw.rule
if cachedRule == nil {
logger.Errorf("rule_eval:%s rule not found", arw.Key())
return
}
now := time.Now().Unix()
alertingKeys := map[string]struct{}{}
// 根据 event 的 tag 将 events 分组,处理告警抑制的情况
eventsMap := make(map[string][]*models.AlertCurEvent)
for _, anomalyPoint := range anomalyPoints {
event := arw.BuildEvent(anomalyPoint, from, now)
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
hash := event.Hash
alertingKeys[hash] = struct{}{}
if mute.IsMuted(cachedRule, event, arw.targetCache, arw.alertMuteCache) {
logger.Debugf("rule_eval:%s event:%v is muted", arw.Key(), event)
continue
}
tagHash := TagHash(anomalyPoint)
eventsMap[tagHash] = append(eventsMap[tagHash], event)
}
for _, events := range eventsMap {
arw.handleEvent(events)
}
arw.HandleRecover(alertingKeys, now)
}
func (arw *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, now int64) *models.AlertCurEvent {
arw.fillTags(anomalyPoint)
arw.mayHandleIdent()
hash := Hash(arw.rule.Id, arw.datasourceId, anomalyPoint)
event := arw.rule.GenerateNewEvent(arw.ctx)
event.TriggerTime = anomalyPoint.Timestamp
event.TagsMap = arw.tagsMap
event.DatasourceId = arw.datasourceId
event.Hash = hash
event.TargetIdent = arw.target
event.TargetNote = arw.targetNote
event.TriggerValue = anomalyPoint.ReadableValue()
event.TagsJSON = arw.tagsArr
event.GroupName = arw.groupName
event.Tags = strings.Join(arw.tagsArr, ",,")
event.IsRecovered = false
event.Callbacks = arw.rule.Callbacks
event.CallbacksJSON = arw.rule.CallbacksJSON
event.Annotations = arw.rule.Annotations
event.AnnotationsJSON = arw.rule.AnnotationsJSON
event.RuleConfig = arw.rule.RuleConfig
event.RuleConfigJson = arw.rule.RuleConfigJson
event.Severity = anomalyPoint.Severity
if from == "inner" {
event.LastEvalTime = now
} else {
event.LastEvalTime = event.TriggerTime
}
return event
}
func (arw *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64) {
for _, hash := range arw.pendings.Keys() {
if _, has := alertingKeys[hash]; has {
continue
}
arw.pendings.Delete(hash)
}
for hash := range arw.fires.GetAll() {
if _, has := alertingKeys[hash]; has {
continue
}
arw.RecoverSingle(hash, now, nil)
}
}
func (arw *Processor) RecoverSingle(hash string, now int64, value *string) {
cachedRule := arw.rule
if cachedRule == nil {
return
}
event, has := arw.fires.Get(hash)
if !has {
return
}
// 如果配置了留观时长,就不能立马恢复了
if cachedRule.RecoverDuration > 0 && now-event.LastEvalTime < cachedRule.RecoverDuration {
logger.Debugf("rule_eval:%s event:%v not recover", arw.Key(), event)
return
}
if value != nil {
event.TriggerValue = *value
}
// 没查到触发阈值的vector,姑且就认为这个vector的值恢复了
// 我确实无法分辨,是prom中有值但是未满足阈值所以没返回,还是prom中确实丢了一些点导致没有数据可以返回,尴尬
arw.fires.Delete(hash)
arw.pendings.Delete(hash)
// 可能是因为调整了promql才恢复的,所以事件里边要体现最新的promql,否则用户会比较困惑
// 当然,其实rule的各个字段都可能发生变化了,都更新一下吧
cachedRule.UpdateEvent(event)
event.IsRecovered = true
event.LastEvalTime = now
arw.pushEventToQueue(event)
}
func (arw *Processor) handleEvent(events []*models.AlertCurEvent) {
var fireEvents []*models.AlertCurEvent
// severity 初始为 4, 一定为遇到比自己优先级高的事件
severity := 4
for _, event := range events {
if event == nil {
continue
}
if arw.rule.PromForDuration == 0 {
fireEvents = append(fireEvents, event)
if severity > event.Severity {
severity = event.Severity
}
continue
}
var preTriggerTime int64
preEvent, has := arw.pendings.Get(event.Hash)
if has {
arw.pendings.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
preTriggerTime = preEvent.TriggerTime
} else {
arw.pendings.Set(event.Hash, event)
preTriggerTime = event.TriggerTime
}
if event.LastEvalTime-preTriggerTime+int64(event.PromEvalInterval) >= int64(arw.rule.PromForDuration) {
fireEvents = append(fireEvents, event)
if severity > event.Severity {
severity = event.Severity
}
continue
}
}
arw.inhibitEvent(fireEvents, severity)
}
func (arw *Processor) inhibitEvent(events []*models.AlertCurEvent, highSeverity int) {
for _, event := range events {
if arw.inhibit && event.Severity > highSeverity {
logger.Debugf("rule_eval:%s event:%+v inhibit highSeverity:%d", arw.Key(), event, highSeverity)
continue
}
arw.fireEvent(event)
}
}
func (arw *Processor) fireEvent(event *models.AlertCurEvent) {
// As arw.rule maybe outdated, use rule from cache
cachedRule := arw.rule
if cachedRule == nil {
return
}
logger.Debugf("rule_eval:%s event:%+v fire", arw.Key(), event)
if fired, has := arw.fires.Get(event.Hash); has {
arw.fires.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
if cachedRule.NotifyRepeatStep == 0 {
logger.Debugf("rule_eval:%s event:%+v repeat is zero nothing to do", arw.Key(), event)
// 说明不想重复通知,那就直接返回了,nothing to do
// do not need to send alert again
return
}
// 之前发送过告警了,这次是否要继续发送,要看是否过了通道静默时间
if event.LastEvalTime > fired.LastSentTime+int64(cachedRule.NotifyRepeatStep)*60 {
if cachedRule.NotifyMaxNumber == 0 {
// 最大可以发送次数如果是0,表示不想限制最大发送次数,一直发即可
event.NotifyCurNumber = fired.NotifyCurNumber + 1
event.FirstTriggerTime = fired.FirstTriggerTime
arw.pushEventToQueue(event)
} else {
// 有最大发送次数的限制,就要看已经发了几次了,是否达到了最大发送次数
if fired.NotifyCurNumber >= cachedRule.NotifyMaxNumber {
logger.Debugf("rule_eval:%s event:%+v reach max number", arw.Key(), event)
return
} else {
event.NotifyCurNumber = fired.NotifyCurNumber + 1
event.FirstTriggerTime = fired.FirstTriggerTime
arw.pushEventToQueue(event)
}
}
}
} else {
event.NotifyCurNumber = 1
event.FirstTriggerTime = event.TriggerTime
arw.pushEventToQueue(event)
}
}
func (arw *Processor) pushEventToQueue(e *models.AlertCurEvent) {
if !e.IsRecovered {
e.LastSentTime = e.LastEvalTime
arw.fires.Set(e.Hash, e)
}
arw.stats.CounterAlertsTotal.WithLabelValues(fmt.Sprintf("%d", e.DatasourceId)).Inc()
dispatch.LogEvent(e, "push_queue")
if !queue.EventQueue.PushFront(e) {
logger.Warningf("event_push_queue: queue is full, event:%+v", e)
}
}
func (arw *Processor) RecoverAlertCurEventFromDb() {
arw.pendings = NewAlertCurEventMap(nil)
curEvents, err := models.AlertCurEventGetByRuleIdAndCluster(arw.ctx, arw.rule.Id, arw.datasourceId)
if err != nil {
logger.Errorf("recover event from db for rule:%s failed, err:%s", arw.Key(), err)
arw.fires = NewAlertCurEventMap(nil)
return
}
fireMap := make(map[string]*models.AlertCurEvent)
for _, event := range curEvents {
event.DB2Mem()
fireMap[event.Hash] = event
}
arw.fires = NewAlertCurEventMap(fireMap)
}
func (arw *Processor) fillTags(anomalyPoint common.AnomalyPoint) {
// handle series tags
tagsMap := make(map[string]string)
for label, value := range anomalyPoint.Labels {
tagsMap[string(label)] = string(value)
}
var e = &models.AlertCurEvent{
TagsMap: tagsMap,
}
// handle rule tags
for _, tag := range arw.rule.AppendTagsJSON {
arr := strings.SplitN(tag, "=", 2)
var defs = []string{
"{{$labels := .TagsMap}}",
"{{$value := .TriggerValue}}",
}
tagValue := arr[1]
text := strings.Join(append(defs, tagValue), "")
t, err := template.New(fmt.Sprint(arw.rule.Id)).Funcs(template.FuncMap(tplx.TemplateFuncMap)).Parse(text)
if err != nil {
tagValue = fmt.Sprintf("parse tag value failed, err:%s", err)
}
var body bytes.Buffer
err = t.Execute(&body, e)
if err != nil {
tagValue = fmt.Sprintf("parse tag value failed, err:%s", err)
}
if err == nil {
tagValue = body.String()
}
tagsMap[arr[0]] = tagValue
}
tagsMap["rulename"] = arw.rule.Name
arw.tagsMap = tagsMap
// handle tagsArr
arw.tagsArr = labelMapToArr(tagsMap)
}
func (arw *Processor) mayHandleIdent() {
// handle ident
if ident, has := arw.tagsMap["ident"]; has {
if target, exists := arw.targetCache.Get(ident); exists {
arw.target = target.Ident
arw.targetNote = target.Note
}
}
}
func (arw *Processor) mayHandleGroup() {
// handle bg
bg := arw.busiGroupCache.GetByBusiGroupId(arw.rule.GroupId)
if bg != nil {
arw.groupName = bg.Name
}
}
func labelMapToArr(m map[string]string) []string {
numLabels := len(m)
labelStrings := make([]string, 0, numLabels)
for label, value := range m {
labelStrings = append(labelStrings, fmt.Sprintf("%s=%s", label, value))
}
if numLabels > 1 {
sort.Strings(labelStrings)
}
return labelStrings
}
func Hash(ruleId, datasourceId int64, vector common.AnomalyPoint) string {
return str.MD5(fmt.Sprintf("%d_%s_%d_%d", ruleId, vector.Labels.String(), datasourceId, vector.Severity))
}
func TagHash(vector common.AnomalyPoint) string {
return str.MD5(vector.Labels.String())
}
package queue
import (
"time"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/toolkits/pkg/container/list"
)
var EventQueue = list.NewSafeListLimited(10000000)
func ReportQueueSize(stats *astats.Stats) {
for {
time.Sleep(time.Second)
stats.GaugeAlertQueueSize.Set(float64(EventQueue.Len()))
}
}
package engine
package record
import (
"context"
......@@ -6,40 +6,43 @@ import (
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/common/conv"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/writer"
)
type RecordRuleContext struct {
cluster string
quit chan struct{}
datasourceId int64
quit chan struct{}
rule *models.RecordingRule
// writers *writer.WritersType
promClients *prom.PromClientMap
}
func NewRecordRuleContext(rule *models.RecordingRule, cluster string) *RecordRuleContext {
func NewRecordRuleContext(rule *models.RecordingRule, datasourceId int64, promClients *prom.PromClientMap, writers *writer.WritersType) *RecordRuleContext {
return &RecordRuleContext{
cluster: cluster,
quit: make(chan struct{}),
rule: rule,
datasourceId: datasourceId,
quit: make(chan struct{}),
rule: rule,
promClients: promClients,
//writers: writers,
}
}
func (rrc *RecordRuleContext) Key() string {
return fmt.Sprintf("record-%s-%d", rrc.cluster, rrc.rule.Id)
return fmt.Sprintf("record-%d-%d", rrc.datasourceId, rrc.rule.Id)
}
func (rrc *RecordRuleContext) Hash() string {
return str.MD5(fmt.Sprintf("%d_%d_%s_%s",
return str.MD5(fmt.Sprintf("%d_%d_%s_%d",
rrc.rule.Id,
rrc.rule.PromEvalInterval,
rrc.rule.PromQl,
rrc.cluster,
rrc.datasourceId,
))
}
......@@ -71,26 +74,25 @@ func (rrc *RecordRuleContext) Eval() {
return
}
if config.ReaderClients.IsNil(rrc.cluster) {
if rrc.promClients.IsNil(rrc.datasourceId) {
logger.Errorf("eval:%s reader client is nil", rrc.Key())
return
}
value, warnings, err := config.ReaderClients.GetCli(rrc.cluster).Query(context.Background(), promql, time.Now())
value, warnings, err := rrc.promClients.GetCli(rrc.datasourceId).Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("eval:%d promql:%s, error:%v", rrc.Key(), promql, err)
logger.Errorf("eval:%s promql:%s, error:%v", rrc.Key(), promql, err)
return
}
if len(warnings) > 0 {
logger.Errorf("eval:%d promql:%s, warnings:%v", rrc.Key(), promql, warnings)
logger.Errorf("eval:%s promql:%s, warnings:%v", rrc.Key(), promql, warnings)
return
}
ts := conv.ConvertToTimeSeries(value, rrc.rule)
ts := ConvertToTimeSeries(value, rrc.rule)
if len(ts) != 0 {
for _, v := range ts {
writer.Writers.PushSample(rrc.rule.Name, v, rrc.cluster)
}
rrc.promClients.GetWriterCli(rrc.datasourceId).Write(ts)
}
}
......
package conv
package record
import (
"math"
"strings"
"time"
"github.com/didi/nightingale/v5/src/models"
"github.com/ccfos/nightingale/v6/models"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/prompb"
)
......
package record
import (
"context"
"fmt"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/naming"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/writer"
)
type Scheduler struct {
// key: hash
recordRules map[string]*RecordRuleContext
aconf aconf.Alert
recordingRuleCache *memsto.RecordingRuleCacheType
promClients *prom.PromClientMap
writers *writer.WritersType
stats *astats.Stats
}
func NewScheduler(aconf aconf.Alert, rrc *memsto.RecordingRuleCacheType, promClients *prom.PromClientMap, writers *writer.WritersType, stats *astats.Stats) *Scheduler {
scheduler := &Scheduler{
aconf: aconf,
recordRules: make(map[string]*RecordRuleContext),
recordingRuleCache: rrc,
promClients: promClients,
writers: writers,
stats: stats,
}
go scheduler.LoopSyncRules(context.Background())
return scheduler
}
func (s *Scheduler) LoopSyncRules(ctx context.Context) {
time.Sleep(time.Duration(s.aconf.EngineDelay) * time.Second)
duration := 9000 * time.Millisecond
for {
select {
case <-ctx.Done():
return
case <-time.After(duration):
s.syncRecordRules()
}
}
}
func (s *Scheduler) syncRecordRules() {
ids := s.recordingRuleCache.GetRuleIds()
recordRules := make(map[string]*RecordRuleContext)
for _, id := range ids {
rule := s.recordingRuleCache.Get(id)
if rule == nil {
continue
}
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
for _, dsId := range datasourceIds {
if !naming.DatasourceHashRing.IsHit(dsId, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
continue
}
recordRule := NewRecordRuleContext(rule, dsId, s.promClients, s.writers)
recordRules[recordRule.Hash()] = recordRule
}
}
for hash, rule := range recordRules {
if _, has := s.recordRules[hash]; !has {
rule.Prepare()
rule.Start()
s.recordRules[hash] = rule
}
}
for hash, rule := range s.recordRules {
if _, has := recordRules[hash]; !has {
rule.Stop()
delete(s.recordRules, hash)
}
}
}
package router
import (
"net/http"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/process"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/gin-gonic/gin"
)
type Router struct {
HTTP httpx.Config
Alert aconf.Alert
AlertMuteCache *memsto.AlertMuteCacheType
TargetCache *memsto.TargetCacheType
BusiGroupCache *memsto.BusiGroupCacheType
AlertStats *astats.Stats
Ctx *ctx.Context
ExternalProcessors *process.ExternalProcessorsType
}
func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheType, tc *memsto.TargetCacheType, bgc *memsto.BusiGroupCacheType,
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType) *Router {
return &Router{
HTTP: httpConfig,
Alert: alert,
AlertStats: astats,
AlertMuteCache: amc,
TargetCache: tc,
BusiGroupCache: bgc,
Ctx: ctx,
ExternalProcessors: externalProcessors,
}
}
func (rt *Router) Config(r *gin.Engine) {
service := r.Group("/v1/n9e")
service.POST("/event", rt.pushEventToQueue)
service.POST("/make-event", rt.makeEvent)
}
func Render(c *gin.Context, data, msg interface{}) {
if msg == nil {
if data == nil {
data = struct{}{}
}
c.JSON(http.StatusOK, gin.H{"data": data, "error": ""})
} else {
c.JSON(http.StatusOK, gin.H{"error": gin.H{"message": msg}})
}
}
func Dangerous(c *gin.Context, v interface{}, code ...int) {
if v == nil {
return
}
switch t := v.(type) {
case string:
if t != "" {
c.JSON(http.StatusOK, gin.H{"error": gin.H{"message": v}})
}
case error:
c.JSON(http.StatusOK, gin.H{"error": gin.H{"message": t.Error()}})
}
}
......@@ -5,20 +5,21 @@ import (
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/alert/naming"
"github.com/ccfos/nightingale/v6/alert/process"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/pkg/poster"
"github.com/didi/nightingale/v5/src/server/common/conv"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/engine"
"github.com/didi/nightingale/v5/src/server/naming"
promstat "github.com/didi/nightingale/v5/src/server/stat"
)
func pushEventToQueue(c *gin.Context) {
func (rt *Router) pushEventToQueue(c *gin.Context) {
var event *models.AlertCurEvent
ginx.BindJSON(c, &event)
if event.RuleId == 0 {
......@@ -40,7 +41,7 @@ func pushEventToQueue(c *gin.Context) {
event.TagsMap[arr[0]] = arr[1]
}
if engine.EventMuteStrategy(nil, event) {
if mute.EventMuteStrategy(event, rt.AlertMuteCache) {
logger.Infof("event_muted: rule_id=%d %s", event.RuleId, event.Hash)
ginx.NewRender(c).Message(nil)
return
......@@ -54,6 +55,10 @@ func pushEventToQueue(c *gin.Context) {
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
}
if err := event.ParseRule("annotations"); err != nil {
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
}
// 如果 rule_note 中有 ; 前缀,则使用 rule_note 替换 tags 中的内容
if strings.HasPrefix(event.RuleNote, ";") {
event.RuleNote = strings.TrimPrefix(event.RuleNote, ";")
......@@ -67,10 +72,10 @@ func pushEventToQueue(c *gin.Context) {
event.NotifyChannels = strings.Join(event.NotifyChannelsJSON, " ")
event.NotifyGroups = strings.Join(event.NotifyGroupsJSON, " ")
promstat.CounterAlertsTotal.WithLabelValues(event.Cluster).Inc()
rt.AlertStats.CounterAlertsTotal.WithLabelValues(event.Cluster).Inc()
engine.LogEvent(event, "http_push_queue")
if !engine.EventQueue.PushFront(event) {
dispatch.LogEvent(event, "http_push_queue")
if !queue.EventQueue.PushFront(event) {
msg := fmt.Sprintf("event:%+v push_queue err: queue is full", event)
ginx.Bomb(200, msg)
logger.Warningf(msg)
......@@ -79,35 +84,25 @@ func pushEventToQueue(c *gin.Context) {
}
type eventForm struct {
Alert bool `json:"alert"`
Vectors []conv.Vector `json:"vectors"`
RuleId int64 `json:"rule_id"`
Cluster string `json:"cluster"`
}
func judgeEvent(c *gin.Context) {
var form eventForm
ginx.BindJSON(c, &form)
ruleContext, exists := engine.GetExternalAlertRule(form.Cluster, form.RuleId)
if !exists {
ginx.Bomb(200, "rule not exists")
}
ruleContext.HandleVectors(form.Vectors, "http")
ginx.NewRender(c).Message(nil)
Alert bool `json:"alert"`
AnomalyPoints []common.AnomalyPoint `json:"vectors"`
RuleId int64 `json:"rule_id"`
DatasourceId int64 `json:"datasource_id"`
Inhibit bool `json:"inhibit"`
}
func makeEvent(c *gin.Context) {
func (rt *Router) makeEvent(c *gin.Context) {
var events []*eventForm
ginx.BindJSON(c, &events)
//now := time.Now().Unix()
for i := 0; i < len(events); i++ {
node, err := naming.ClusterHashRing.GetNode(events[i].Cluster, fmt.Sprintf("%d", events[i].RuleId))
node, err := naming.DatasourceHashRing.GetNode(events[i].DatasourceId, fmt.Sprintf("%d", events[i].RuleId))
if err != nil {
logger.Warningf("event:%+v get node err:%v", events[i], err)
ginx.Bomb(200, "event node not exists")
}
if node != config.C.Heartbeat.Endpoint {
if node != rt.Alert.Heartbeat.Endpoint {
err := forwardEvent(events[i], node)
if err != nil {
logger.Warningf("event:%+v forward err:%v", events[i], err)
......@@ -116,19 +111,18 @@ func makeEvent(c *gin.Context) {
continue
}
ruleContext, exists := engine.GetExternalAlertRule(events[i].Cluster, events[i].RuleId)
ruleWorker, exists := rt.ExternalProcessors.GetExternalAlertRule(events[i].DatasourceId, events[i].RuleId)
logger.Debugf("handle event:%+v exists:%v", events[i], exists)
if !exists {
ginx.Bomb(200, "rule not exists")
}
if events[i].Alert {
go ruleContext.HandleVectors(events[i].Vectors, "http")
go ruleWorker.Handle(events[i].AnomalyPoints, "http", events[i].Inhibit)
} else {
for _, vector := range events[i].Vectors {
alertVector := engine.NewAlertVector(ruleContext, nil, vector, "http")
for _, vector := range events[i].AnomalyPoints {
readableString := vector.ReadableValue()
go ruleContext.RecoverSingle(alertVector.Hash(), vector.Timestamp, &readableString)
go ruleWorker.RecoverSingle(process.Hash(events[i].RuleId, events[i].DatasourceId, vector), vector.Timestamp, &readableString)
}
}
}
......
......@@ -5,16 +5,17 @@ import (
"strings"
"time"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ibex"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/pkg/ibex"
"github.com/didi/nightingale/v5/src/pkg/poster"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/memsto"
"github.com/toolkits/pkg/logger"
)
func SendCallbacks(urls []string, event *models.AlertCurEvent) {
func SendCallbacks(ctx *ctx.Context, urls []string, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType, ibexConf aconf.Ibex) {
for _, url := range urls {
if url == "" {
continue
......@@ -22,7 +23,7 @@ func SendCallbacks(urls []string, event *models.AlertCurEvent) {
if strings.HasPrefix(url, "${ibex}") {
if !event.IsRecovered {
handleIbex(url, event)
handleIbex(ctx, url, event, targetCache, ibexConf)
}
continue
}
......@@ -59,7 +60,7 @@ type TaskCreateReply struct {
Dat int64 `json:"dat"` // task.id
}
func handleIbex(url string, event *models.AlertCurEvent) {
func handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType, ibexConf aconf.Ibex) {
arr := strings.Split(url, "/")
var idstr string
......@@ -89,7 +90,7 @@ func handleIbex(url string, event *models.AlertCurEvent) {
return
}
tpl, err := models.TaskTplGet("id = ?", id)
tpl, err := models.TaskTplGet(ctx, "id = ?", id)
if err != nil {
logger.Errorf("event_callback_ibex: failed to get tpl: %v", err)
return
......@@ -102,7 +103,7 @@ func handleIbex(url string, event *models.AlertCurEvent) {
// check perm
// tpl.GroupId - host - account 三元组校验权限
can, err := canDoIbex(tpl.UpdateBy, tpl, host)
can, err := canDoIbex(ctx, tpl.UpdateBy, tpl, host, targetCache)
if err != nil {
logger.Errorf("event_callback_ibex: check perm fail: %v", err)
return
......@@ -130,10 +131,10 @@ func handleIbex(url string, event *models.AlertCurEvent) {
var res TaskCreateReply
err = ibex.New(
config.C.Ibex.Address,
config.C.Ibex.BasicAuthUser,
config.C.Ibex.BasicAuthPass,
config.C.Ibex.Timeout,
ibexConf.Address,
ibexConf.BasicAuthUser,
ibexConf.BasicAuthPass,
ibexConf.Timeout,
).
Path("/ibex/v1/tasks").
In(in).
......@@ -154,9 +155,9 @@ func handleIbex(url string, event *models.AlertCurEvent) {
record := models.TaskRecord{
Id: res.Dat,
GroupId: tpl.GroupId,
IbexAddress: config.C.Ibex.Address,
IbexAuthUser: config.C.Ibex.BasicAuthUser,
IbexAuthPass: config.C.Ibex.BasicAuthPass,
IbexAddress: ibexConf.Address,
IbexAuthUser: ibexConf.BasicAuthUser,
IbexAuthPass: ibexConf.BasicAuthPass,
Title: in.Title,
Account: in.Account,
Batch: in.Batch,
......@@ -169,13 +170,13 @@ func handleIbex(url string, event *models.AlertCurEvent) {
CreateBy: in.Creator,
}
if err = record.Add(); err != nil {
if err = record.Add(ctx); err != nil {
logger.Errorf("event_callback_ibex: persist task_record fail: %v", err)
}
}
func canDoIbex(username string, tpl *models.TaskTpl, host string) (bool, error) {
user, err := models.UserGetByUsername(username)
func canDoIbex(ctx *ctx.Context, username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType) (bool, error) {
user, err := models.UserGetByUsername(ctx, username)
if err != nil {
return false, err
}
......@@ -184,7 +185,7 @@ func canDoIbex(username string, tpl *models.TaskTpl, host string) (bool, error)
return true, nil
}
target, has := memsto.TargetCache.Get(host)
target, has := targetCache.Get(host)
if !has {
return false, nil
}
......
......@@ -5,10 +5,10 @@ import (
"strings"
"time"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/pkg/poster"
"github.com/toolkits/pkg/logger"
)
type dingtalkMarkdown struct {
......@@ -70,23 +70,6 @@ func (ds *DingtalkSender) Send(ctx MessageContext) {
}
}
func (ds *DingtalkSender) SendRaw(users []*models.User, title, message string) {
if len(users) == 0 {
return
}
urls, _ := ds.extract(users)
body := dingtalk{
Msgtype: "markdown",
Markdown: dingtalkMarkdown{
Title: title,
Text: message,
},
}
for _, url := range urls {
ds.doSend(url, body)
}
}
// extract urls and ats from Users
func (ds *DingtalkSender) extract(users []*models.User) ([]string, []string) {
urls := make([]string, 0, len(users))
......
......@@ -5,11 +5,12 @@ import (
"html/template"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/toolkits/pkg/logger"
"gopkg.in/gomail.v2"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/config"
"gopkg.in/gomail.v2"
)
var mailch chan *gomail.Message
......@@ -17,13 +18,14 @@ var mailch chan *gomail.Message
type EmailSender struct {
subjectTpl *template.Template
contentTpl *template.Template
smtp aconf.SMTPConfig
}
func (es *EmailSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || ctx.Rule == nil || ctx.Event == nil {
return
}
tos := es.extract(ctx.Users)
tos := extract(ctx.Users)
var subject string
if es.subjectTpl != nil {
......@@ -32,15 +34,10 @@ func (es *EmailSender) Send(ctx MessageContext) {
subject = ctx.Rule.Name
}
content := BuildTplMessage(es.contentTpl, ctx.Event)
WriteEmail(subject, content, tos)
}
func (es *EmailSender) SendRaw(users []*models.User, title, message string) {
tos := es.extract(users)
WriteEmail(title, message, tos)
es.WriteEmail(subject, content, tos)
}
func (es *EmailSender) extract(users []*models.User) []string {
func extract(users []*models.User) []string {
tos := make([]string, 0, len(users))
for _, u := range users {
if u.Email != "" {
......@@ -50,8 +47,8 @@ func (es *EmailSender) extract(users []*models.User) []string {
return tos
}
func SendEmail(subject, content string, tos []string) {
conf := config.C.SMTP
func (es *EmailSender) SendEmail(subject, content string, tos []string, stmp aconf.SMTPConfig) {
conf := stmp
d := gomail.NewDialer(conf.Host, conf.Port, conf.User, conf.Pass)
if conf.InsecureSkipVerify {
......@@ -60,7 +57,7 @@ func SendEmail(subject, content string, tos []string) {
m := gomail.NewMessage()
m.SetHeader("From", config.C.SMTP.From)
m.SetHeader("From", stmp.From)
m.SetHeader("To", tos...)
m.SetHeader("Subject", subject)
m.SetBody("text/html", content)
......@@ -71,10 +68,10 @@ func SendEmail(subject, content string, tos []string) {
}
}
func WriteEmail(subject, content string, tos []string) {
func (es *EmailSender) WriteEmail(subject, content string, tos []string) {
m := gomail.NewMessage()
m.SetHeader("From", config.C.SMTP.From)
m.SetHeader("From", es.smtp.From)
m.SetHeader("To", tos...)
m.SetHeader("Subject", subject)
m.SetBody("text/html", content)
......@@ -94,10 +91,10 @@ func dialSmtp(d *gomail.Dialer) gomail.SendCloser {
}
}
func StartEmailSender() {
func StartEmailSender(smtp aconf.SMTPConfig) {
mailch = make(chan *gomail.Message, 100000)
conf := config.C.SMTP
conf := smtp
if conf.Host == "" || conf.Port == 0 {
logger.Warning("SMTP configurations invalid")
......
......@@ -5,10 +5,10 @@ import (
"strings"
"time"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/pkg/poster"
"github.com/toolkits/pkg/logger"
)
type feishuContent struct {
......@@ -53,22 +53,6 @@ func (fs *FeishuSender) Send(ctx MessageContext) {
}
}
func (fs *FeishuSender) SendRaw(users []*models.User, title, message string) {
if len(users) == 0 {
return
}
urls, _ := fs.extract(users)
body := feishu{
Msgtype: "text",
Content: feishuContent{
Text: message,
},
}
for _, url := range urls {
fs.doSend(url, body)
}
}
func (fs *FeishuSender) extract(users []*models.User) ([]string, []string) {
urls := make([]string, 0, len(users))
ats := make([]string, 0, len(users))
......
......@@ -6,10 +6,10 @@ import (
"strings"
"time"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/pkg/poster"
"github.com/toolkits/pkg/logger"
)
type MatterMostMessage struct {
......@@ -44,17 +44,6 @@ func (ms *MmSender) Send(ctx MessageContext) {
})
}
func (ms *MmSender) SendRaw(users []*models.User, title, message string) {
urls := ms.extract(users)
if len(urls) == 0 {
return
}
SendMM(MatterMostMessage{
Text: message,
Tokens: urls,
})
}
func (ms *MmSender) extract(users []*models.User) []string {
tokens := make([]string, 0, len(users))
for _, user := range users {
......
......@@ -2,31 +2,64 @@ package sender
import (
"bytes"
"os"
"os/exec"
"time"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/didi/nightingale/v5/src/notifier"
"github.com/didi/nightingale/v5/src/pkg/sys"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/sys"
)
func MayPluginNotify(noticeBytes []byte) {
func MayPluginNotify(noticeBytes []byte, notifyScript *memsto.NotifyScriptCacheType) {
if len(noticeBytes) == 0 {
return
}
alertingCallPlugin(noticeBytes)
alertingCallScript(noticeBytes)
alertingCallScript(noticeBytes, notifyScript)
}
func alertingCallScript(stdinBytes []byte) {
func alertingCallScript(stdinBytes []byte, notifyScript *memsto.NotifyScriptCacheType) {
// not enable or no notify.py? do nothing
if !config.C.Alerting.CallScript.Enable || config.C.Alerting.CallScript.ScriptPath == "" {
config := notifyScript.GetNotifyScript()
if !config.Enable || config.Content == "" {
return
}
fpath := config.C.Alerting.CallScript.ScriptPath
fpath := ".notify_scriptt"
if config.Type == 1 {
fpath = config.Content
} else {
rewrite := true
if file.IsExist(fpath) {
oldContent, err := file.ToString(fpath)
if err != nil {
logger.Errorf("event_notify: read script file err: %v", err)
return
}
if oldContent == config.Content {
rewrite = false
}
}
if rewrite {
_, err := file.WriteString(fpath, config.Content)
if err != nil {
logger.Errorf("event_notify: write script file err: %v", err)
return
}
err = os.Chmod(fpath, 0777)
if err != nil {
logger.Errorf("event_notify: chmod script file err: %v", err)
return
}
}
fpath = "./" + fpath
}
cmd := exec.Command(fpath)
cmd.Stdin = bytes.NewReader(stdinBytes)
......@@ -41,7 +74,7 @@ func alertingCallScript(stdinBytes []byte) {
return
}
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.C.Alerting.Timeout)*time.Millisecond)
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.Timeout)*time.Second)
if isTimeout {
if err == nil {
......@@ -62,16 +95,3 @@ func alertingCallScript(stdinBytes []byte) {
logger.Infof("event_notify: exec %s output: %s", fpath, buf.String())
}
// call notify.so via golang plugin build
// ig. etc/script/notify/notify.so
func alertingCallPlugin(stdinBytes []byte) {
if !config.C.Alerting.CallPlugin.Enable {
return
}
logger.Debugf("alertingCallPlugin begin")
logger.Debugf("payload:", string(stdinBytes))
notifier.Instance.Notify(stdinBytes)
logger.Debugf("alertingCallPlugin done")
}
......@@ -4,20 +4,14 @@ import (
"bytes"
"html/template"
"github.com/toolkits/pkg/slice"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/memsto"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
)
type (
// Sender 发送消息通知的接口
Sender interface {
Send(ctx MessageContext)
// SendRaw 发送原始消息,目前在notifyMaintainer时使用
SendRaw(users []*models.User, title, message string)
}
// MessageContext 一个event所生成的告警通知的上下文
......@@ -29,29 +23,25 @@ type (
)
func NewSender(key string, tpls map[string]*template.Template) Sender {
if !slice.ContainsString(config.C.Alerting.NotifyBuiltinChannels, key) {
return nil
}
switch key {
case models.Dingtalk:
return &DingtalkSender{tpl: tpls["dingtalk.tpl"]}
return &DingtalkSender{tpl: tpls[models.Dingtalk]}
case models.Wecom:
return &WecomSender{tpl: tpls["wecom.tpl"]}
return &WecomSender{tpl: tpls[models.Wecom]}
case models.Feishu:
return &FeishuSender{tpl: tpls["feishu.tpl"]}
return &FeishuSender{tpl: tpls[models.Feishu]}
case models.Email:
return &EmailSender{subjectTpl: tpls["subject.tpl"], contentTpl: tpls["mailbody.tpl"]}
return &EmailSender{subjectTpl: tpls["mailsubject"], contentTpl: tpls[models.Email]}
case models.Mm:
return &MmSender{tpl: tpls["mm.tpl"]}
return &MmSender{tpl: tpls[models.Mm]}
case models.Telegram:
return &TelegramSender{tpl: tpls["telegram.tpl"]}
return &TelegramSender{tpl: tpls[models.Telegram]}
}
return nil
}
func BuildMessageContext(rule *models.AlertRule, event *models.AlertCurEvent, uids []int64) MessageContext {
users := memsto.UserCache.GetByUserIds(uids)
func BuildMessageContext(rule *models.AlertRule, event *models.AlertCurEvent, uids []int64, userCache *memsto.UserCacheType) MessageContext {
users := userCache.GetByUserIds(uids)
return MessageContext{
Rule: rule,
Event: event,
......
......@@ -5,10 +5,10 @@ import (
"strings"
"time"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/pkg/poster"
"github.com/toolkits/pkg/logger"
)
type TelegramMessage struct {
......@@ -38,14 +38,6 @@ func (ts *TelegramSender) Send(ctx MessageContext) {
})
}
func (ts *TelegramSender) SendRaw(users []*models.User, title, message string) {
tokens := ts.extract(users)
SendTelegram(TelegramMessage{
Text: message,
Tokens: tokens,
})
}
func (ts *TelegramSender) extract(users []*models.User) []string {
tokens := make([]string, 0, len(users))
for _, user := range users {
......
......@@ -5,14 +5,14 @@ import (
"encoding/json"
"io/ioutil"
"net/http"
"time"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/models"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/toolkits/pkg/logger"
)
func SendWebhooks(webhooks []config.Webhook, event *models.AlertCurEvent) {
func SendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent) {
for _, conf := range webhooks {
if conf.Url == "" || !conf.Enable {
continue
......@@ -44,8 +44,9 @@ func SendWebhooks(webhooks []config.Webhook, event *models.AlertCurEvent) {
}
}
// todo add skip verify
client := http.Client{
Timeout: conf.TimeoutDuration,
Timeout: time.Duration(conf.Timeout) * time.Second,
}
var resp *http.Response
......
......@@ -5,10 +5,10 @@ import (
"strings"
"time"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/pkg/poster"
"github.com/toolkits/pkg/logger"
)
type wecomMarkdown struct {
......@@ -41,19 +41,6 @@ func (ws *WecomSender) Send(ctx MessageContext) {
}
}
func (ws *WecomSender) SendRaw(users []*models.User, title, message string) {
urls := ws.extract(users)
for _, url := range urls {
body := wecom{
Msgtype: "markdown",
Markdown: wecomMarkdown{
Content: message,
},
}
ws.doSend(url, body)
}
}
func (ws *WecomSender) extract(users []*models.User) []string {
urls := make([]string, 0, len(users))
for _, user := range users {
......
package cconf
import (
"github.com/ccfos/nightingale/v6/pkg/cas"
"github.com/ccfos/nightingale/v6/pkg/ldapx"
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
"github.com/ccfos/nightingale/v6/pkg/oidcx"
"github.com/gin-gonic/gin"
)
type Center struct {
Plugins []Plugin
BasicAuth gin.Accounts
MetricsYamlFile string
OpsYamlFile string
BuiltinIntegrationsDir string
I18NHeaderKey string
MetricDesc MetricDescType
TargetMetrics map[string]string
AnonymousAccess AnonymousAccess
JWTAuth JWTAuth
ProxyAuth ProxyAuth
LDAP ldapx.Config
OIDC oidcx.Config
CAS cas.Config
OAuth oauth2x.Config
Ibex Ibex
}
type Plugin struct {
Id int64 `json:"id"`
Category string `json:"category"`
Type string `json:"plugin_type"`
TypeName string `json:"plugin_type_name"`
}
type ProxyAuth struct {
Enable bool
HeaderUserNameKey string
DefaultRoles []string
}
type JWTAuth struct {
SigningKey string
AccessExpired int64
RefreshExpired int64
RedisKeyPrefix string
}
type AnonymousAccess struct {
PromQuerier bool
AlertDetail bool
}
type Ibex struct {
Address string
BasicAuthUser string
BasicAuthPass string
Timeout int64
}
func (c *Center) PreCheck() {
if len(c.Plugins) == 0 {
c.Plugins = Plugins
}
}
package config
package cconf
import (
"path"
......@@ -8,13 +8,13 @@ import (
)
// metricDesc , As load map happens before read map, there is no necessary to use concurrent map for metric desc store
type metricDesc struct {
type MetricDescType struct {
CommonDesc map[string]string `yaml:",inline" json:"common"`
Zh map[string]string `yaml:"zh" json:"zh"`
En map[string]string `yaml:"en" json:"en"`
}
var MetricDesc metricDesc
var MetricDesc MetricDescType
// GetMetricDesc , if metric is not registered, empty string will be returned
func GetMetricDesc(lang, metric string) string {
......@@ -33,8 +33,8 @@ func GetMetricDesc(lang, metric string) string {
return MetricDesc.CommonDesc[metric]
}
func loadMetricsYaml() error {
fp := C.MetricsYamlFile
func LoadMetricsYaml(metricsYamlFile string) error {
fp := metricsYamlFile
if fp == "" {
fp = path.Join(runner.Cwd, "etc", "metrics.yaml")
}
......
package cconf
import (
"path"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/runner"
)
var Operations = Operation{}
type Operation struct {
Ops []Ops `yaml:"ops"`
}
type Ops struct {
Name string `yaml:"name" json:"name"`
Cname string `yaml:"cname" json:"cname"`
Ops []string `yaml:"ops" json:"ops"`
}
func LoadOpsYaml(opsYamlFile string) error {
fp := opsYamlFile
if fp == "" {
fp = path.Join(runner.Cwd, "etc", "ops.yaml")
}
if !file.IsExist(fp) {
return nil
}
return file.ReadYaml(fp, &Operations)
}
func GetAllOps(ops []Ops) []string {
var ret []string
for _, op := range ops {
ret = append(ret, op.Ops...)
}
return ret
}
package cconf
var Plugins = []Plugin{
{
Id: 1,
Category: "timeseries",
Type: "prometheus",
TypeName: "Prometheus Like",
},
{
Id: 2,
Category: "logging",
Type: "elasticsearch",
TypeName: "Elasticsearch",
},
{
Id: 3,
Category: "logging",
Type: "jaeger",
TypeName: "Jaeger",
},
}
package center
import (
"context"
"fmt"
"github.com/ccfos/nightingale/v6/alert"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/process"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/center/sso"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
alertrt "github.com/ccfos/nightingale/v6/alert/router"
centerrt "github.com/ccfos/nightingale/v6/center/router"
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
"github.com/toolkits/pkg/i18n"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
config, err := conf.InitConfig(configDir, cryptoKey)
if err != nil {
return nil, fmt.Errorf("failed to init config: %v", err)
}
cconf.LoadMetricsYaml(config.Center.MetricsYamlFile)
cconf.LoadOpsYaml(config.Center.OpsYamlFile)
logxClean, err := logx.Init(config.Log)
if err != nil {
return nil, err
}
i18n.Init()
db, err := storage.New(config.DB)
if err != nil {
return nil, err
}
ctx := ctx.NewContext(context.Background(), db)
models.InitRoot(ctx)
redis, err := storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
syncStats := memsto.NewSyncStats()
alertStats := astats.NewSyncStats()
idents := idents.New(db, config.Pushgw.DatasourceId, config.Pushgw.MaxOffset)
sso := sso.Init(config.Center, ctx)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
targetCache := memsto.NewTargetCache(ctx, syncStats)
dsCache := memsto.NewDatasourceCache(ctx, syncStats)
alertMuteCache := memsto.NewAlertMuteCache(ctx, syncStats)
alertRuleCache := memsto.NewAlertRuleCache(ctx, syncStats)
promClients := prom.NewPromClient(ctx, config.Alert.Heartbeat)
externalProcessors := process.NewExternalProcessors()
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, ctx, promClients)
writers := writer.NewWriters(config.Pushgw)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
centerRouter := centerrt.New(config.HTTP, config.Center, cconf.Operations, dsCache, promClients, redis, sso, ctx)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, targetCache, busiGroupCache, idents, writers, ctx)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
centerRouter.Config(r)
alertrtRouter.Config(r)
pushgwRouter.Config(r)
httpClean := httpx.Init(config.HTTP, r)
return func() {
logxClean()
httpClean()
}, nil
}
package stat
package cstats
import (
"time"
......@@ -6,7 +6,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
const Service = "n9e-webapi"
const Service = "n9e-center"
var (
labels = []string{"service", "code", "path", "method"}
......
package router
import (
"fmt"
"net/http"
"path"
"strings"
"time"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/center/sso"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/pkg/aop"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/storage"
"github.com/toolkits/pkg/ginx"
"github.com/gin-gonic/gin"
)
type Router struct {
HTTP httpx.Config
Center cconf.Center
Operations cconf.Operation
DatasourceCache *memsto.DatasourceCacheType
PromClients *prom.PromClientMap
Redis storage.Redis
Sso *sso.SsoClient
Ctx *ctx.Context
}
func New(httpConfig httpx.Config, center cconf.Center, operations cconf.Operation, ds *memsto.DatasourceCacheType, pc *prom.PromClientMap,
redis storage.Redis, sso *sso.SsoClient, ctx *ctx.Context) *Router {
return &Router{
HTTP: httpConfig,
Center: center,
Operations: operations,
DatasourceCache: ds,
PromClients: pc,
Redis: redis,
Sso: sso,
Ctx: ctx,
}
}
func stat() gin.HandlerFunc {
return func(c *gin.Context) {
start := time.Now()
c.Next()
code := fmt.Sprintf("%d", c.Writer.Status())
method := c.Request.Method
labels := []string{cstats.Service, code, c.FullPath(), method}
cstats.RequestCounter.WithLabelValues(labels...).Inc()
cstats.RequestDuration.WithLabelValues(labels...).Observe(float64(time.Since(start).Seconds()))
}
}
func languageDetector(i18NHeaderKey string) gin.HandlerFunc {
headerKey := i18NHeaderKey
return func(c *gin.Context) {
if headerKey != "" {
lang := c.GetHeader(headerKey)
if lang != "" {
if strings.HasPrefix(lang, "zh") {
c.Request.Header.Set("X-Language", "zh")
} else if strings.HasPrefix(lang, "en") {
c.Request.Header.Set("X-Language", "en")
} else {
c.Request.Header.Set("X-Language", lang)
}
} else {
c.Request.Header.Set("X-Language", "en")
}
}
c.Next()
}
}
func (rt *Router) configNoRoute(r *gin.Engine) {
r.NoRoute(func(c *gin.Context) {
arr := strings.Split(c.Request.URL.Path, ".")
suffix := arr[len(arr)-1]
switch suffix {
case "png", "jpeg", "jpg", "svg", "ico", "gif", "css", "js", "html", "htm", "gz", "zip", "map":
c.File(path.Join(strings.Split("pub/"+c.Request.URL.Path, "/")...))
default:
c.File(path.Join("pub", "index.html"))
}
})
}
func (rt *Router) Config(r *gin.Engine) {
r.Use(stat())
r.Use(languageDetector(rt.Center.I18NHeaderKey))
r.Use(aop.Recovery())
pagesPrefix := "/api/n9e"
pages := r.Group(pagesPrefix)
{
if rt.Center.AnonymousAccess.PromQuerier {
pages.Any("/proxy/:id/*url", rt.dsProxy)
pages.POST("/query-range-batch", rt.promBatchQueryRange)
} else {
pages.Any("/proxy/:id/*url", rt.auth(), rt.dsProxy)
pages.POST("/query-range-batch", rt.auth(), rt.promBatchQueryRange)
}
pages.POST("/auth/login", rt.jwtMock(), rt.loginPost)
pages.POST("/auth/logout", rt.jwtMock(), rt.logoutPost)
pages.POST("/auth/refresh", rt.jwtMock(), rt.refreshPost)
pages.GET("/auth/sso-config", rt.ssoConfigNameGet)
pages.GET("/auth/redirect", rt.loginRedirect)
pages.GET("/auth/redirect/cas", rt.loginRedirectCas)
pages.GET("/auth/redirect/oauth", rt.loginRedirectOAuth)
pages.GET("/auth/callback", rt.loginCallback)
pages.GET("/auth/callback/cas", rt.loginCallbackCas)
pages.GET("/auth/callback/oauth", rt.loginCallbackOAuth)
pages.GET("/metrics/desc", rt.metricsDescGetFile)
pages.POST("/metrics/desc", rt.metricsDescGetMap)
pages.GET("/notify-channels", rt.notifyChannelsGets)
pages.GET("/contact-keys", rt.contactKeysGets)
pages.GET("/self/perms", rt.auth(), rt.user(), rt.permsGets)
pages.GET("/self/profile", rt.auth(), rt.user(), rt.selfProfileGet)
pages.PUT("/self/profile", rt.auth(), rt.user(), rt.selfProfilePut)
pages.PUT("/self/password", rt.auth(), rt.user(), rt.selfPasswordPut)
pages.GET("/users", rt.auth(), rt.user(), rt.perm("/users"), rt.userGets)
pages.POST("/users", rt.auth(), rt.admin(), rt.userAddPost)
pages.GET("/user/:id/profile", rt.auth(), rt.userProfileGet)
pages.PUT("/user/:id/profile", rt.auth(), rt.admin(), rt.userProfilePut)
pages.PUT("/user/:id/password", rt.auth(), rt.admin(), rt.userPasswordPut)
pages.DELETE("/user/:id", rt.auth(), rt.admin(), rt.userDel)
pages.GET("/metric-views", rt.auth(), rt.metricViewGets)
pages.DELETE("/metric-views", rt.auth(), rt.user(), rt.metricViewDel)
pages.POST("/metric-views", rt.auth(), rt.user(), rt.metricViewAdd)
pages.PUT("/metric-views", rt.auth(), rt.user(), rt.metricViewPut)
pages.GET("/user-groups", rt.auth(), rt.user(), rt.userGroupGets)
pages.POST("/user-groups", rt.auth(), rt.user(), rt.perm("/user-groups/add"), rt.userGroupAdd)
pages.GET("/user-group/:id", rt.auth(), rt.user(), rt.userGroupGet)
pages.PUT("/user-group/:id", rt.auth(), rt.user(), rt.perm("/user-groups/put"), rt.userGroupWrite(), rt.userGroupPut)
pages.DELETE("/user-group/:id", rt.auth(), rt.user(), rt.perm("/user-groups/del"), rt.userGroupWrite(), rt.userGroupDel)
pages.POST("/user-group/:id/members", rt.auth(), rt.user(), rt.perm("/user-groups/put"), rt.userGroupWrite(), rt.userGroupMemberAdd)
pages.DELETE("/user-group/:id/members", rt.auth(), rt.user(), rt.perm("/user-groups/put"), rt.userGroupWrite(), rt.userGroupMemberDel)
pages.GET("/busi-groups", rt.auth(), rt.user(), rt.busiGroupGets)
pages.POST("/busi-groups", rt.auth(), rt.user(), rt.perm("/busi-groups/add"), rt.busiGroupAdd)
pages.GET("/busi-groups/alertings", rt.auth(), rt.busiGroupAlertingsGets)
pages.GET("/busi-group/:id", rt.auth(), rt.user(), rt.bgro(), rt.busiGroupGet)
pages.PUT("/busi-group/:id", rt.auth(), rt.user(), rt.perm("/busi-groups/put"), rt.bgrw(), rt.busiGroupPut)
pages.POST("/busi-group/:id/members", rt.auth(), rt.user(), rt.perm("/busi-groups/put"), rt.bgrw(), rt.busiGroupMemberAdd)
pages.DELETE("/busi-group/:id/members", rt.auth(), rt.user(), rt.perm("/busi-groups/put"), rt.bgrw(), rt.busiGroupMemberDel)
pages.DELETE("/busi-group/:id", rt.auth(), rt.user(), rt.perm("/busi-groups/del"), rt.bgrw(), rt.busiGroupDel)
pages.GET("/busi-group/:id/perm/:perm", rt.auth(), rt.user(), rt.checkBusiGroupPerm)
pages.GET("/targets", rt.auth(), rt.user(), rt.targetGets)
pages.POST("/target/list", rt.auth(), rt.user(), rt.targetGetsByHostFilter)
pages.DELETE("/targets", rt.auth(), rt.user(), rt.perm("/targets/del"), rt.targetDel)
pages.GET("/targets/tags", rt.auth(), rt.user(), rt.targetGetTags)
pages.POST("/targets/tags", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetBindTagsByFE)
pages.DELETE("/targets/tags", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUnbindTagsByFE)
pages.PUT("/targets/note", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUpdateNote)
pages.PUT("/targets/bgid", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUpdateBgid)
pages.POST("/builtin-cate-favorite", rt.auth(), rt.user(), rt.builtinCateFavoriteAdd)
pages.DELETE("/builtin-cate-favorite/:name", rt.auth(), rt.user(), rt.builtinCateFavoriteDel)
pages.GET("/builtin-boards", rt.builtinBoardGets)
pages.GET("/builtin-board/:name", rt.builtinBoardGet)
pages.GET("/dashboards/builtin/list", rt.builtinBoardGets)
pages.GET("/builtin-boards-cates", rt.auth(), rt.user(), rt.builtinBoardCateGets)
pages.POST("/builtin-boards-detail", rt.auth(), rt.user(), rt.builtinBoardDetailGets)
pages.GET("/integrations/icon/:cate/:name", func(c *gin.Context) {
cate := ginx.UrlParamStr(c, "cate")
fp := "integrations/" + cate + "/icon/" + ginx.UrlParamStr(c, "name")
c.File(path.Join(fp))
})
pages.GET("/busi-group/:id/boards", rt.auth(), rt.user(), rt.perm("/dashboards"), rt.bgro(), rt.boardGets)
pages.POST("/busi-group/:id/boards", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.bgrw(), rt.boardAdd)
pages.POST("/busi-group/:id/board/:bid/clone", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.bgrw(), rt.boardClone)
pages.GET("/board/:bid", rt.boardGet)
pages.GET("/board/:bid/pure", rt.boardPureGet)
pages.PUT("/board/:bid", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.boardPut)
pages.PUT("/board/:bid/configs", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.boardPutConfigs)
pages.PUT("/board/:bid/public", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.boardPutPublic)
pages.DELETE("/boards", rt.auth(), rt.user(), rt.perm("/dashboards/del"), rt.boardDel)
pages.GET("/share-charts", rt.chartShareGets)
pages.POST("/share-charts", rt.auth(), rt.chartShareAdd)
pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.alertRuleBuiltinList)
pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
pages.POST("/busi-group/:id/alert-rules/builtin", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleBuiltinImport)
pages.GET("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGets)
pages.POST("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByFE)
pages.DELETE("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules/del"), rt.bgrw(), rt.alertRuleDel)
pages.PUT("/busi-group/:id/alert-rules/fields", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.bgrw(), rt.alertRulePutFields)
pages.PUT("/busi-group/:id/alert-rule/:arid", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.alertRulePutByFE)
pages.GET("/alert-rule/:arid", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGet)
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
pages.POST("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/add"), rt.bgrw(), rt.recordingRuleAddByFE)
pages.DELETE("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/del"), rt.bgrw(), rt.recordingRuleDel)
pages.PUT("/busi-group/:id/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.bgrw(), rt.recordingRulePutByFE)
pages.GET("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGet)
pages.PUT("/busi-group/:id/recording-rules/fields", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.recordingRulePutFields)
pages.GET("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.bgro(), rt.alertMuteGetsByBG)
pages.POST("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes/add"), rt.bgrw(), rt.alertMuteAdd)
pages.DELETE("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes/del"), rt.bgrw(), rt.alertMuteDel)
pages.PUT("/busi-group/:id/alert-mute/:amid", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.alertMutePutByFE)
pages.PUT("/busi-group/:id/alert-mutes/fields", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.bgrw(), rt.alertMutePutFields)
pages.GET("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.bgro(), rt.alertSubscribeGets)
pages.GET("/alert-subscribe/:sid", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.alertSubscribeGet)
pages.POST("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/add"), rt.bgrw(), rt.alertSubscribeAdd)
pages.PUT("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/put"), rt.bgrw(), rt.alertSubscribePut)
pages.DELETE("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/del"), rt.bgrw(), rt.alertSubscribeDel)
if rt.Center.AnonymousAccess.AlertDetail {
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
} else {
pages.GET("/alert-cur-event/:eid", rt.auth(), rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.auth(), rt.alertHisEventGet)
}
// card logic
pages.GET("/alert-cur-events/list", rt.auth(), rt.alertCurEventsList)
pages.GET("/alert-cur-events/card", rt.auth(), rt.alertCurEventsCard)
pages.POST("/alert-cur-events/card/details", rt.auth(), rt.alertCurEventsCardDetails)
pages.GET("/alert-his-events/list", rt.auth(), rt.alertHisEventsList)
pages.DELETE("/alert-cur-events", rt.auth(), rt.user(), rt.perm("/alert-cur-events/del"), rt.alertCurEventDel)
pages.GET("/alert-aggr-views", rt.auth(), rt.alertAggrViewGets)
pages.DELETE("/alert-aggr-views", rt.auth(), rt.user(), rt.alertAggrViewDel)
pages.POST("/alert-aggr-views", rt.auth(), rt.user(), rt.alertAggrViewAdd)
pages.PUT("/alert-aggr-views", rt.auth(), rt.user(), rt.alertAggrViewPut)
pages.GET("/busi-group/:id/task-tpls", rt.auth(), rt.user(), rt.perm("/job-tpls"), rt.bgro(), rt.taskTplGets)
pages.POST("/busi-group/:id/task-tpls", rt.auth(), rt.user(), rt.perm("/job-tpls/add"), rt.bgrw(), rt.taskTplAdd)
pages.DELETE("/busi-group/:id/task-tpl/:tid", rt.auth(), rt.user(), rt.perm("/job-tpls/del"), rt.bgrw(), rt.taskTplDel)
pages.POST("/busi-group/:id/task-tpls/tags", rt.auth(), rt.user(), rt.perm("/job-tpls/put"), rt.bgrw(), rt.taskTplBindTags)
pages.DELETE("/busi-group/:id/task-tpls/tags", rt.auth(), rt.user(), rt.perm("/job-tpls/put"), rt.bgrw(), rt.taskTplUnbindTags)
pages.GET("/busi-group/:id/task-tpl/:tid", rt.auth(), rt.user(), rt.perm("/job-tpls"), rt.bgro(), rt.taskTplGet)
pages.PUT("/busi-group/:id/task-tpl/:tid", rt.auth(), rt.user(), rt.perm("/job-tpls/put"), rt.bgrw(), rt.taskTplPut)
pages.GET("/busi-group/:id/tasks", rt.auth(), rt.user(), rt.perm("/job-tasks"), rt.bgro(), rt.taskGets)
pages.POST("/busi-group/:id/tasks", rt.auth(), rt.user(), rt.perm("/job-tasks/add"), rt.bgrw(), rt.taskAdd)
pages.GET("/busi-group/:id/task/*url", rt.auth(), rt.user(), rt.perm("/job-tasks"), rt.taskProxy)
pages.PUT("/busi-group/:id/task/*url", rt.auth(), rt.user(), rt.perm("/job-tasks/put"), rt.bgrw(), rt.taskProxy)
pages.GET("/servers", rt.auth(), rt.admin(), rt.serversGet)
pages.GET("/server-clusters", rt.auth(), rt.admin(), rt.serverClustersGet)
pages.POST("/datasource/list", rt.auth(), rt.datasourceList)
pages.POST("/datasource/plugin/list", rt.auth(), rt.pluginList)
pages.POST("/datasource/upsert", rt.auth(), rt.admin(), rt.datasourceUpsert)
pages.POST("/datasource/desc", rt.auth(), rt.admin(), rt.datasourceGet)
pages.POST("/datasource/status/update", rt.auth(), rt.admin(), rt.datasourceUpdataStatus)
pages.DELETE("/datasource/", rt.auth(), rt.admin(), rt.datasourceDel)
pages.GET("/roles", rt.auth(), rt.admin(), rt.roleGets)
pages.POST("/roles", rt.auth(), rt.admin(), rt.roleAdd)
pages.PUT("/roles", rt.auth(), rt.admin(), rt.rolePut)
pages.DELETE("/role/:id", rt.auth(), rt.admin(), rt.roleDel)
pages.GET("/role/:id/ops", rt.auth(), rt.admin(), rt.operationOfRole)
pages.PUT("/role/:id/ops", rt.auth(), rt.admin(), rt.roleBindOperation)
pages.GET("operation", rt.operations)
pages.GET("/notify-tpls", rt.auth(), rt.admin(), rt.notifyTplGets)
pages.PUT("/notify-tpl/content", rt.auth(), rt.admin(), rt.notifyTplUpdateContent)
pages.PUT("/notify-tpl", rt.auth(), rt.admin(), rt.notifyTplUpdate)
pages.POST("/notify-tpl/preview", rt.auth(), rt.admin(), rt.notifyTplPreview)
pages.GET("/sso-configs", rt.auth(), rt.admin(), rt.ssoConfigGets)
pages.PUT("/sso-config", rt.auth(), rt.admin(), rt.ssoConfigUpdate)
pages.GET("/webhooks", rt.auth(), rt.admin(), rt.webhookGets)
pages.PUT("/webhooks", rt.auth(), rt.admin(), rt.webhookPuts)
pages.GET("/notify-script", rt.auth(), rt.admin(), rt.notifyScriptGet)
pages.PUT("/notify-script", rt.auth(), rt.admin(), rt.notifyScriptPut)
pages.GET("/notify-channel", rt.auth(), rt.admin(), rt.notifyChannelGets)
pages.PUT("/notify-channel", rt.auth(), rt.admin(), rt.notifyChannelPuts)
pages.GET("/notify-contact", rt.auth(), rt.admin(), rt.notifyContactGets)
pages.PUT("/notify-contact", rt.auth(), rt.admin(), rt.notifyContactPuts)
}
service := r.Group("/v1/n9e")
if len(rt.Center.BasicAuth) > 0 {
service.Use(gin.BasicAuth(rt.Center.BasicAuth))
}
{
service.Any("/prometheus/*url", rt.dsProxy)
service.POST("/users", rt.userAddPost)
service.GET("/users", rt.userFindAll)
service.GET("/targets", rt.targetGets)
service.GET("/targets/tags", rt.targetGetTags)
service.POST("/targets/tags", rt.targetBindTagsByService)
service.DELETE("/targets/tags", rt.targetUnbindTagsByService)
service.PUT("/targets/note", rt.targetUpdateNoteByService)
service.POST("/alert-rules", rt.alertRuleAddByService)
service.DELETE("/alert-rules", rt.alertRuleDelByService)
service.PUT("/alert-rule/:arid", rt.alertRulePutByService)
service.GET("/alert-rule/:arid", rt.alertRuleGet)
service.GET("/alert-rules", rt.alertRulesGetByService)
service.GET("/alert-mutes", rt.alertMuteGets)
service.POST("/alert-mutes", rt.alertMuteAddByService)
service.DELETE("/alert-mutes", rt.alertMuteDel)
service.GET("/alert-cur-events", rt.alertCurEventsList)
service.GET("/alert-his-events", rt.alertHisEventsList)
service.GET("/alert-his-event/:eid", rt.alertHisEventGet)
service.GET("/config/:id", rt.configGet)
service.GET("/configs", rt.configsGet)
service.PUT("/configs", rt.configsPut)
service.POST("/configs", rt.configsPost)
service.DELETE("/configs", rt.configsDel)
service.POST("/conf-prop/encrypt", rt.confPropEncrypt)
service.POST("/conf-prop/decrypt", rt.confPropDecrypt)
}
rt.configNoRoute(r)
}
func Render(c *gin.Context, data, msg interface{}) {
if msg == nil {
if data == nil {
data = struct{}{}
}
c.JSON(http.StatusOK, gin.H{"data": data, "error": ""})
} else {
c.JSON(http.StatusOK, gin.H{"error": gin.H{"message": msg}})
}
}
func Dangerous(c *gin.Context, v interface{}, code ...int) {
if v == nil {
return
}
switch t := v.(type) {
case string:
if t != "" {
c.JSON(http.StatusOK, gin.H{"error": gin.H{"message": v}})
}
case error:
c.JSON(http.StatusOK, gin.H{"error": gin.H{"message": t.Error()}})
}
}
......@@ -3,19 +3,20 @@ package router
import (
"net/http"
"github.com/didi/nightingale/v5/src/models"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// no param
func alertAggrViewGets(c *gin.Context) {
lst, err := models.AlertAggrViewGets(c.MustGet("userid"))
func (rt *Router) alertAggrViewGets(c *gin.Context) {
lst, err := models.AlertAggrViewGets(rt.Ctx, c.MustGet("userid"))
ginx.NewRender(c).Data(lst, err)
}
// body: name, rule, cate
func alertAggrViewAdd(c *gin.Context) {
func (rt *Router) alertAggrViewAdd(c *gin.Context) {
var f models.AlertAggrView
ginx.BindJSON(c, &f)
......@@ -27,31 +28,31 @@ func alertAggrViewAdd(c *gin.Context) {
f.Id = 0
f.CreateBy = me.Id
ginx.Dangerous(f.Add())
ginx.Dangerous(f.Add(rt.Ctx))
ginx.NewRender(c).Data(f, nil)
}
// body: ids
func alertAggrViewDel(c *gin.Context) {
func (rt *Router) alertAggrViewDel(c *gin.Context) {
var f idsForm
ginx.BindJSON(c, &f)
f.Verify()
me := c.MustGet("user").(*models.User)
if me.IsAdmin() {
ginx.NewRender(c).Message(models.AlertAggrViewDel(f.Ids))
ginx.NewRender(c).Message(models.AlertAggrViewDel(rt.Ctx, f.Ids))
} else {
ginx.NewRender(c).Message(models.AlertAggrViewDel(f.Ids, me.Id))
ginx.NewRender(c).Message(models.AlertAggrViewDel(rt.Ctx, f.Ids, me.Id))
}
}
// body: id, name, rule, cate
func alertAggrViewPut(c *gin.Context) {
func (rt *Router) alertAggrViewPut(c *gin.Context) {
var f models.AlertAggrView
ginx.BindJSON(c, &f)
view, err := models.AlertAggrViewGet("id = ?", f.Id)
view, err := models.AlertAggrViewGet(rt.Ctx, "id = ?", f.Id)
ginx.Dangerous(err)
if view == nil {
......@@ -69,5 +70,5 @@ func alertAggrViewPut(c *gin.Context) {
}
}
ginx.NewRender(c).Message(view.Update(f.Name, f.Rule, f.Cate, me.Id))
ginx.NewRender(c).Message(view.Update(rt.Ctx, f.Name, f.Rule, f.Cate, me.Id))
}
......@@ -5,10 +5,10 @@ import (
"sort"
"strings"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/didi/nightingale/v5/src/models"
)
func parseAggrRules(c *gin.Context) []*models.AggrRule {
......@@ -38,14 +38,20 @@ func parseAggrRules(c *gin.Context) []*models.AggrRule {
return rules
}
func alertCurEventsCard(c *gin.Context) {
func (rt *Router) alertCurEventsCard(c *gin.Context) {
stime, etime := getTimeRange(c)
severity := ginx.QueryInt(c, "severity", -1)
query := ginx.QueryStr(c, "query", "")
busiGroupId := ginx.QueryInt64(c, "bgid", 0)
clusters := queryClusters(c)
dsIds := queryDatasourceIds(c)
rules := parseAggrRules(c)
prod := ginx.QueryStr(c, "prod", "")
prod := ginx.QueryStr(c, "prods", "")
prods := []string{}
if prod != "" {
prods = strings.Split(prod, ",")
}
cate := ginx.QueryStr(c, "cate", "$all")
cates := []string{}
if cate != "$all" {
......@@ -53,7 +59,7 @@ func alertCurEventsCard(c *gin.Context) {
}
// 最多获取50000个,获取太多也没啥意义
list, err := models.AlertCurEventGets(prod, busiGroupId, stime, etime, severity, clusters, cates, query, 50000, 0)
list, err := models.AlertCurEventGets(rt.Ctx, prods, busiGroupId, stime, etime, severity, dsIds, cates, query, 50000, 0)
ginx.Dangerous(err)
cardmap := make(map[string]*AlertCard)
......@@ -104,15 +110,15 @@ type AlertCard struct {
Severity int `json:"severity"`
}
func alertCurEventsCardDetails(c *gin.Context) {
func (rt *Router) alertCurEventsCardDetails(c *gin.Context) {
var f idsForm
ginx.BindJSON(c, &f)
list, err := models.AlertCurEventGetByIds(f.Ids)
list, err := models.AlertCurEventGetByIds(rt.Ctx, f.Ids)
if err == nil {
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(list); i++ {
list[i].FillNotifyGroups(cache)
list[i].FillNotifyGroups(rt.Ctx, cache)
}
}
......@@ -120,29 +126,39 @@ func alertCurEventsCardDetails(c *gin.Context) {
}
// 列表方式,拉取活跃告警
func alertCurEventsList(c *gin.Context) {
func (rt *Router) alertCurEventsList(c *gin.Context) {
stime, etime := getTimeRange(c)
severity := ginx.QueryInt(c, "severity", -1)
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
busiGroupId := ginx.QueryInt64(c, "bgid", 0)
clusters := queryClusters(c)
prod := ginx.QueryStr(c, "prod", "")
dsIds := queryDatasourceIds(c)
prod := ginx.QueryStr(c, "prods", "")
if prod == "" {
prod = ginx.QueryStr(c, "rule_prods", "")
}
prods := []string{}
if prod != "" {
prods = strings.Split(prod, ",")
}
cate := ginx.QueryStr(c, "cate", "$all")
cates := []string{}
if cate != "$all" {
cates = strings.Split(cate, ",")
}
total, err := models.AlertCurEventTotal(prod, busiGroupId, stime, etime, severity, clusters, cates, query)
total, err := models.AlertCurEventTotal(rt.Ctx, prods, busiGroupId, stime, etime, severity, dsIds, cates, query)
ginx.Dangerous(err)
list, err := models.AlertCurEventGets(prod, busiGroupId, stime, etime, severity, clusters, cates, query, limit, ginx.Offset(c, limit))
list, err := models.AlertCurEventGets(rt.Ctx, prods, busiGroupId, stime, etime, severity, dsIds, cates, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(list); i++ {
list[i].FillNotifyGroups(cache)
list[i].FillNotifyGroups(rt.Ctx, cache)
}
ginx.NewRender(c).Data(gin.H{
......@@ -151,7 +167,7 @@ func alertCurEventsList(c *gin.Context) {
}, nil)
}
func alertCurEventDel(c *gin.Context) {
func (rt *Router) alertCurEventDel(c *gin.Context) {
var f idsForm
ginx.BindJSON(c, &f)
f.Verify()
......@@ -159,21 +175,21 @@ func alertCurEventDel(c *gin.Context) {
set := make(map[int64]struct{})
for i := 0; i < len(f.Ids); i++ {
event, err := models.AlertCurEventGetById(f.Ids[i])
event, err := models.AlertCurEventGetById(rt.Ctx, f.Ids[i])
ginx.Dangerous(err)
if _, has := set[event.GroupId]; !has {
bgrwCheck(c, event.GroupId)
rt.bgrwCheck(c, event.GroupId)
set[event.GroupId] = struct{}{}
}
}
ginx.NewRender(c).Message(models.AlertCurEventDel(f.Ids))
ginx.NewRender(c).Message(models.AlertCurEventDel(rt.Ctx, f.Ids))
}
func alertCurEventGet(c *gin.Context) {
func (rt *Router) alertCurEventGet(c *gin.Context) {
eid := ginx.UrlParamInt64(c, "eid")
event, err := models.AlertCurEventGetById(eid)
event, err := models.AlertCurEventGetById(rt.Ctx, eid)
ginx.Dangerous(err)
if event == nil {
......
......@@ -4,10 +4,10 @@ import (
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/didi/nightingale/v5/src/models"
)
func getTimeRange(c *gin.Context) (stime, etime int64) {
......@@ -26,7 +26,7 @@ func getTimeRange(c *gin.Context) (stime, etime int64) {
return
}
func alertHisEventsList(c *gin.Context) {
func (rt *Router) alertHisEventsList(c *gin.Context) {
stime, etime := getTimeRange(c)
severity := ginx.QueryInt(c, "severity", -1)
......@@ -34,23 +34,33 @@ func alertHisEventsList(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
busiGroupId := ginx.QueryInt64(c, "bgid", 0)
clusters := queryClusters(c)
prod := ginx.QueryStr(c, "prod", "")
dsIds := queryDatasourceIds(c)
prod := ginx.QueryStr(c, "prods", "")
if prod == "" {
prod = ginx.QueryStr(c, "rule_prods", "")
}
prods := []string{}
if prod != "" {
prods = strings.Split(prod, ",")
}
cate := ginx.QueryStr(c, "cate", "$all")
cates := []string{}
if cate != "$all" {
cates = strings.Split(cate, ",")
}
total, err := models.AlertHisEventTotal(prod, busiGroupId, stime, etime, severity, recovered, clusters, cates, query)
total, err := models.AlertHisEventTotal(rt.Ctx, prods, busiGroupId, stime, etime, severity, recovered, dsIds, cates, query)
ginx.Dangerous(err)
list, err := models.AlertHisEventGets(prod, busiGroupId, stime, etime, severity, recovered, clusters, cates, query, limit, ginx.Offset(c, limit))
list, err := models.AlertHisEventGets(rt.Ctx, prods, busiGroupId, stime, etime, severity, recovered, dsIds, cates, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(list); i++ {
list[i].FillNotifyGroups(cache)
list[i].FillNotifyGroups(rt.Ctx, cache)
}
ginx.NewRender(c).Data(gin.H{
......@@ -59,9 +69,9 @@ func alertHisEventsList(c *gin.Context) {
}, nil)
}
func alertHisEventGet(c *gin.Context) {
func (rt *Router) alertHisEventGet(c *gin.Context) {
eid := ginx.UrlParamInt64(c, "eid")
event, err := models.AlertHisEventGetById(eid)
event, err := models.AlertHisEventGetById(rt.Ctx, eid)
ginx.Dangerous(err)
if event == nil {
......
package router
import (
"encoding/json"
"net/http"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/didi/nightingale/v5/src/models"
)
// Return all, front-end search and paging
func alertRuleGets(c *gin.Context) {
func (rt *Router) alertRuleGets(c *gin.Context) {
busiGroupId := ginx.UrlParamInt64(c, "id")
ars, err := models.AlertRuleGets(busiGroupId)
ars, err := models.AlertRuleGets(rt.Ctx, busiGroupId)
if err == nil {
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(cache)
ars[i].FillNotifyGroups(rt.Ctx, cache)
}
}
ginx.NewRender(c).Data(ars, err)
}
func alertRulesGetByService(c *gin.Context) {
func (rt *Router) alertRulesGetByService(c *gin.Context) {
prods := strings.Split(ginx.QueryStr(c, "prods", ""), ",")
query := ginx.QueryStr(c, "query", "")
algorithm := ginx.QueryStr(c, "algorithm", "")
......@@ -37,18 +38,18 @@ func alertRulesGetByService(c *gin.Context) {
}
disabled := ginx.QueryInt(c, "disabled", -1)
ars, err := models.AlertRulesGetsBy(prods, query, algorithm, cluster, cates, disabled)
ars, err := models.AlertRulesGetsBy(rt.Ctx, prods, query, algorithm, cluster, cates, disabled)
if err == nil {
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(cache)
ars[i].FillNotifyGroups(rt.Ctx, cache)
}
}
ginx.NewRender(c).Data(ars, err)
}
// single or import
func alertRuleAddByFE(c *gin.Context) {
func (rt *Router) alertRuleAddByFE(c *gin.Context) {
username := c.MustGet("username").(string)
var lst []models.AlertRule
......@@ -60,12 +61,12 @@ func alertRuleAddByFE(c *gin.Context) {
}
bgid := ginx.UrlParamInt64(c, "id")
reterr := alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language"))
reterr := rt.alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language"))
ginx.NewRender(c).Data(reterr, nil)
}
func alertRuleAddByService(c *gin.Context) {
func (rt *Router) alertRuleAddByService(c *gin.Context) {
var lst []models.AlertRule
ginx.BindJSON(c, &lst)
......@@ -73,11 +74,11 @@ func alertRuleAddByService(c *gin.Context) {
if count == 0 {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
reterr := alertRuleAddForService(lst, "")
reterr := rt.alertRuleAddForService(lst, "")
ginx.NewRender(c).Data(reterr, nil)
}
func alertRuleAddForService(lst []models.AlertRule, username string) map[string]string {
func (rt *Router) alertRuleAddForService(lst []models.AlertRule, username string) map[string]string {
count := len(lst)
// alert rule name -> error string
reterr := make(map[string]string)
......@@ -93,7 +94,7 @@ func alertRuleAddForService(lst []models.AlertRule, username string) map[string]
continue
}
if err := lst[i].Add(); err != nil {
if err := lst[i].Add(rt.Ctx, models.GetChannelMap(rt.Ctx)); err != nil {
reterr[lst[i].Name] = err.Error()
} else {
reterr[lst[i].Name] = ""
......@@ -102,7 +103,7 @@ func alertRuleAddForService(lst []models.AlertRule, username string) map[string]
return reterr
}
func alertRuleAdd(lst []models.AlertRule, username string, bgid int64, lang string) map[string]string {
func (rt *Router) alertRuleAdd(lst []models.AlertRule, username string, bgid int64, lang string) map[string]string {
count := len(lst)
// alert rule name -> error string
reterr := make(map[string]string)
......@@ -119,7 +120,7 @@ func alertRuleAdd(lst []models.AlertRule, username string, bgid int64, lang stri
continue
}
if err := lst[i].Add(); err != nil {
if err := lst[i].Add(rt.Ctx, models.GetChannelMap(rt.Ctx)); err != nil {
reterr[lst[i].Name] = i18n.Sprintf(lang, err.Error())
} else {
reterr[lst[i].Name] = ""
......@@ -128,28 +129,28 @@ func alertRuleAdd(lst []models.AlertRule, username string, bgid int64, lang stri
return reterr
}
func alertRuleDel(c *gin.Context) {
func (rt *Router) alertRuleDel(c *gin.Context) {
var f idsForm
ginx.BindJSON(c, &f)
f.Verify()
// param(busiGroupId) for protect
ginx.NewRender(c).Message(models.AlertRuleDels(f.Ids, ginx.UrlParamInt64(c, "id")))
ginx.NewRender(c).Message(models.AlertRuleDels(rt.Ctx, f.Ids, ginx.UrlParamInt64(c, "id")))
}
func alertRuleDelByService(c *gin.Context) {
func (rt *Router) alertRuleDelByService(c *gin.Context) {
var f idsForm
ginx.BindJSON(c, &f)
f.Verify()
ginx.NewRender(c).Message(models.AlertRuleDels(f.Ids))
ginx.NewRender(c).Message(models.AlertRuleDels(rt.Ctx, f.Ids))
}
func alertRulePutByFE(c *gin.Context) {
func (rt *Router) alertRulePutByFE(c *gin.Context) {
var f models.AlertRule
ginx.BindJSON(c, &f)
arid := ginx.UrlParamInt64(c, "arid")
ar, err := models.AlertRuleGetById(arid)
ar, err := models.AlertRuleGetById(rt.Ctx, arid)
ginx.Dangerous(err)
if ar == nil {
......@@ -157,25 +158,25 @@ func alertRulePutByFE(c *gin.Context) {
return
}
bgrwCheck(c, ar.GroupId)
rt.bgrwCheck(c, ar.GroupId)
f.UpdateBy = c.MustGet("username").(string)
ginx.NewRender(c).Message(ar.Update(f))
ginx.NewRender(c).Message(ar.Update(rt.Ctx, f, models.GetChannelMap(rt.Ctx)))
}
func alertRulePutByService(c *gin.Context) {
func (rt *Router) alertRulePutByService(c *gin.Context) {
var f models.AlertRule
ginx.BindJSON(c, &f)
arid := ginx.UrlParamInt64(c, "arid")
ar, err := models.AlertRuleGetById(arid)
ar, err := models.AlertRuleGetById(rt.Ctx, arid)
ginx.Dangerous(err)
if ar == nil {
ginx.NewRender(c, http.StatusNotFound).Message("No such AlertRule")
return
}
ginx.NewRender(c).Message(ar.Update(f))
ginx.NewRender(c).Message(ar.Update(rt.Ctx, f, models.GetChannelMap(rt.Ctx)))
}
type alertRuleFieldForm struct {
......@@ -185,7 +186,7 @@ type alertRuleFieldForm struct {
}
// update one field: cluster note severity disabled prom_eval_interval prom_for_duration notify_channels notify_groups notify_recovered notify_repeat_step callbacks runbook_url append_tags
func alertRulePutFields(c *gin.Context) {
func (rt *Router) alertRulePutFields(c *gin.Context) {
var f alertRuleFieldForm
ginx.BindJSON(c, &f)
......@@ -197,7 +198,7 @@ func alertRulePutFields(c *gin.Context) {
f.Fields["update_at"] = time.Now().Unix()
for i := 0; i < len(f.Ids); i++ {
ar, err := models.AlertRuleGetById(f.Ids[i])
ar, err := models.AlertRuleGetById(rt.Ctx, f.Ids[i])
ginx.Dangerous(err)
if ar == nil {
......@@ -209,7 +210,7 @@ func alertRulePutFields(c *gin.Context) {
if callbacks, has := f.Fields["callbacks"]; has {
callback := callbacks.(string)
if !strings.Contains(ar.Callbacks, callback) {
ginx.Dangerous(ar.UpdateFieldsMap(map[string]interface{}{"callbacks": ar.Callbacks + " " + callback}))
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": ar.Callbacks + " " + callback}))
continue
}
}
......@@ -219,21 +220,29 @@ func alertRulePutFields(c *gin.Context) {
// 删除一个 callback 地址
if callbacks, has := f.Fields["callbacks"]; has {
callback := callbacks.(string)
ginx.Dangerous(ar.UpdateFieldsMap(map[string]interface{}{"callbacks": strings.ReplaceAll(ar.Callbacks, callback, "")}))
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": strings.ReplaceAll(ar.Callbacks, callback, "")}))
continue
}
}
ginx.Dangerous(ar.UpdateFieldsMap(f.Fields))
for k, v := range f.Fields {
if k == "datasource_ids" {
b, err := json.Marshal(v)
ginx.Dangerous(err)
f.Fields[k] = string(b)
}
}
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, f.Fields))
}
ginx.NewRender(c).Message(nil)
}
func alertRuleGet(c *gin.Context) {
func (rt *Router) alertRuleGet(c *gin.Context) {
arid := ginx.UrlParamInt64(c, "arid")
ar, err := models.AlertRuleGetById(arid)
ar, err := models.AlertRuleGetById(rt.Ctx, arid)
ginx.Dangerous(err)
if ar == nil {
......@@ -241,6 +250,8 @@ func alertRuleGet(c *gin.Context) {
return
}
err = ar.FillNotifyGroups(make(map[int64]*models.UserGroup))
err = ar.FillNotifyGroups(rt.Ctx, make(map[int64]*models.UserGroup))
ginx.Dangerous(err)
ginx.NewRender(c).Data(ar, err)
}
......@@ -4,34 +4,38 @@ import (
"net/http"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/didi/nightingale/v5/src/models"
)
// Return all, front-end search and paging
func alertSubscribeGets(c *gin.Context) {
func (rt *Router) alertSubscribeGets(c *gin.Context) {
bgid := ginx.UrlParamInt64(c, "id")
lst, err := models.AlertSubscribeGets(bgid)
lst, err := models.AlertSubscribeGets(rt.Ctx, bgid)
if err == nil {
ugcache := make(map[int64]*models.UserGroup)
for i := 0; i < len(lst); i++ {
ginx.Dangerous(lst[i].FillUserGroups(ugcache))
ginx.Dangerous(lst[i].FillUserGroups(rt.Ctx, ugcache))
}
rulecache := make(map[int64]string)
for i := 0; i < len(lst); i++ {
ginx.Dangerous(lst[i].FillRuleName(rulecache))
ginx.Dangerous(lst[i].FillRuleName(rt.Ctx, rulecache))
}
for i := 0; i < len(lst); i++ {
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
}
}
ginx.NewRender(c).Data(lst, err)
}
func alertSubscribeGet(c *gin.Context) {
func (rt *Router) alertSubscribeGet(c *gin.Context) {
subid := ginx.UrlParamInt64(c, "sid")
sub, err := models.AlertSubscribeGet("id=?", subid)
sub, err := models.AlertSubscribeGet(rt.Ctx, "id=?", subid)
ginx.Dangerous(err)
if sub == nil {
......@@ -40,15 +44,17 @@ func alertSubscribeGet(c *gin.Context) {
}
ugcache := make(map[int64]*models.UserGroup)
ginx.Dangerous(sub.FillUserGroups(ugcache))
ginx.Dangerous(sub.FillUserGroups(rt.Ctx, ugcache))
rulecache := make(map[int64]string)
ginx.Dangerous(sub.FillRuleName(rulecache))
ginx.Dangerous(sub.FillRuleName(rt.Ctx, rulecache))
ginx.Dangerous(sub.FillDatasourceIds(rt.Ctx))
ginx.Dangerous(sub.DB2FE())
ginx.NewRender(c).Data(sub, nil)
}
func alertSubscribeAdd(c *gin.Context) {
func (rt *Router) alertSubscribeAdd(c *gin.Context) {
var f models.AlertSubscribe
ginx.BindJSON(c, &f)
......@@ -61,10 +67,10 @@ func alertSubscribeAdd(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "group_id invalid")
}
ginx.NewRender(c).Message(f.Add())
ginx.NewRender(c).Message(f.Add(rt.Ctx))
}
func alertSubscribePut(c *gin.Context) {
func (rt *Router) alertSubscribePut(c *gin.Context) {
var fs []models.AlertSubscribe
ginx.BindJSON(c, &fs)
......@@ -74,6 +80,7 @@ func alertSubscribePut(c *gin.Context) {
fs[i].UpdateBy = username
fs[i].UpdateAt = timestamp
ginx.Dangerous(fs[i].Update(
rt.Ctx,
"name",
"disabled",
"cluster",
......@@ -86,16 +93,20 @@ func alertSubscribePut(c *gin.Context) {
"user_group_ids",
"update_at",
"update_by",
"webhooks",
"for_duration",
"redefine_webhooks",
"datasource_ids",
))
}
ginx.NewRender(c).Message(nil)
}
func alertSubscribeDel(c *gin.Context) {
func (rt *Router) alertSubscribeDel(c *gin.Context) {
var f idsForm
ginx.BindJSON(c, &f)
f.Verify()
ginx.NewRender(c).Message(models.AlertSubscribeDel(f.Ids))
ginx.NewRender(c).Message(models.AlertSubscribeDel(rt.Ctx, f.Ids))
}
此差异已折叠。
......@@ -3,12 +3,12 @@ package router
import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
"github.com/didi/nightingale/v5/src/models"
)
type busiGroupForm struct {
......@@ -18,7 +18,7 @@ type busiGroupForm struct {
Members []models.BusiGroupMember `json:"members"`
}
func busiGroupAdd(c *gin.Context) {
func (rt *Router) busiGroupAdd(c *gin.Context) {
var f busiGroupForm
ginx.BindJSON(c, &f)
......@@ -39,10 +39,10 @@ func busiGroupAdd(c *gin.Context) {
}
username := c.MustGet("username").(string)
ginx.Dangerous(models.BusiGroupAdd(f.Name, f.LabelEnable, f.LabelValue, f.Members, username))
ginx.Dangerous(models.BusiGroupAdd(rt.Ctx, f.Name, f.LabelEnable, f.LabelValue, f.Members, username))
// 如果创建成功,拿着name去查,应该可以查到
newbg, err := models.BusiGroupGet("name=?", f.Name)
newbg, err := models.BusiGroupGet(rt.Ctx, "name=?", f.Name)
ginx.Dangerous(err)
if newbg == nil {
......@@ -53,16 +53,16 @@ func busiGroupAdd(c *gin.Context) {
ginx.NewRender(c).Data(newbg.Id, nil)
}
func busiGroupPut(c *gin.Context) {
func (rt *Router) busiGroupPut(c *gin.Context) {
var f busiGroupForm
ginx.BindJSON(c, &f)
username := c.MustGet("username").(string)
targetbg := c.MustGet("busi_group").(*models.BusiGroup)
ginx.NewRender(c).Message(targetbg.Update(f.Name, f.LabelEnable, f.LabelValue, username))
ginx.NewRender(c).Message(targetbg.Update(rt.Ctx, f.Name, f.LabelEnable, f.LabelValue, username))
}
func busiGroupMemberAdd(c *gin.Context) {
func (rt *Router) busiGroupMemberAdd(c *gin.Context) {
var members []models.BusiGroupMember
ginx.BindJSON(c, &members)
......@@ -75,10 +75,10 @@ func busiGroupMemberAdd(c *gin.Context) {
}
}
ginx.NewRender(c).Message(targetbg.AddMembers(members, username))
ginx.NewRender(c).Message(targetbg.AddMembers(rt.Ctx, members, username))
}
func busiGroupMemberDel(c *gin.Context) {
func (rt *Router) busiGroupMemberDel(c *gin.Context) {
var members []models.BusiGroupMember
ginx.BindJSON(c, &members)
......@@ -91,14 +91,14 @@ func busiGroupMemberDel(c *gin.Context) {
}
}
ginx.NewRender(c).Message(targetbg.DelMembers(members, username))
ginx.NewRender(c).Message(targetbg.DelMembers(rt.Ctx, members, username))
}
func busiGroupDel(c *gin.Context) {
func (rt *Router) busiGroupDel(c *gin.Context) {
username := c.MustGet("username").(string)
targetbg := c.MustGet("busi_group").(*models.BusiGroup)
err := targetbg.Del()
err := targetbg.Del(rt.Ctx)
if err != nil {
logger.Infof("busi_group_delete fail: operator=%s, group_name=%s error=%v", username, targetbg.Name, err)
} else {
......@@ -109,26 +109,29 @@ func busiGroupDel(c *gin.Context) {
}
// 我是超管、或者我是业务组成员
func busiGroupGets(c *gin.Context) {
func (rt *Router) busiGroupGets(c *gin.Context) {
limit := ginx.QueryInt(c, "limit", defaultLimit)
query := ginx.QueryStr(c, "query", "")
all := ginx.QueryBool(c, "all", false)
me := c.MustGet("user").(*models.User)
lst, err := me.BusiGroups(limit, query, all)
lst, err := me.BusiGroups(rt.Ctx, limit, query, all)
if len(lst) == 0 {
lst = []models.BusiGroup{}
}
ginx.NewRender(c).Data(lst, err)
}
// 这个接口只有在活跃告警页面才调用,获取各个BG的活跃告警数量
func busiGroupAlertingsGets(c *gin.Context) {
func (rt *Router) busiGroupAlertingsGets(c *gin.Context) {
ids := ginx.QueryStr(c, "ids", "")
ret, err := models.AlertNumbers(str.IdsInt64(ids))
ret, err := models.AlertNumbers(rt.Ctx, str.IdsInt64(ids))
ginx.NewRender(c).Data(ret, err)
}
func busiGroupGet(c *gin.Context) {
bg := BusiGroup(ginx.UrlParamInt64(c, "id"))
ginx.Dangerous(bg.FillUserGroups())
func (rt *Router) busiGroupGet(c *gin.Context) {
bg := BusiGroup(rt.Ctx, ginx.UrlParamInt64(c, "id"))
ginx.Dangerous(bg.FillUserGroups(rt.Ctx))
ginx.NewRender(c).Data(bg, nil)
}
......@@ -3,26 +3,26 @@ package router
import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
"github.com/didi/nightingale/v5/src/models"
)
func chartShareGets(c *gin.Context) {
func (rt *Router) chartShareGets(c *gin.Context) {
ids := ginx.QueryStr(c, "ids", "")
lst, err := models.ChartShareGetsByIds(str.IdsInt64(ids, ","))
lst, err := models.ChartShareGetsByIds(rt.Ctx, str.IdsInt64(ids, ","))
ginx.NewRender(c).Data(lst, err)
}
type chartShareForm struct {
Configs string `json:"configs"`
DatasourceId int64 `json:"datasource_id"`
Configs string `json:"configs"`
}
func chartShareAdd(c *gin.Context) {
func (rt *Router) chartShareAdd(c *gin.Context) {
username := c.MustGet("username").(string)
cluster := MustGetCluster(c)
var forms []chartShareForm
ginx.BindJSON(c, &forms)
......@@ -32,12 +32,12 @@ func chartShareAdd(c *gin.Context) {
for _, f := range forms {
chart := models.ChartShare{
Cluster: cluster,
Configs: f.Configs,
CreateBy: username,
CreateAt: now,
DatasourceId: f.DatasourceId,
Configs: f.Configs,
CreateBy: username,
CreateAt: now,
}
ginx.Dangerous(chart.Add())
ginx.Dangerous(chart.Add(rt.Ctx))
ids = append(ids, chart.Id)
}
......
此差异已折叠。
package router
type ChartPure struct {
Configs string `json:"configs"`
Weight int `json:"weight"`
}
type ChartGroupPure struct {
Name string `json:"name"`
Weight int `json:"weight"`
Charts []ChartPure `json:"charts"`
}
type DashboardPure struct {
Name string `json:"name"`
Tags string `json:"tags"`
Configs string `json:"configs"`
ChartGroups []ChartGroupPure `json:"chart_groups"`
}
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册