未验证 提交 68b918e3 编写于 作者: Z Zhenxu 提交者: GitHub

Events can be configured as alarm source (#7028)

上级 1a727585
......@@ -54,6 +54,7 @@ Release Notes.
* Include events of the entity(s) in the alarm.
* Support `native-json` format log in kafka-fetcher-plugin.
* Fix counter misuse in the alarm core. Alarm can't be triggered in time.
* Events can be configured as alarm source.
#### UI
* Add logo for kong plugin.
......
......@@ -55,10 +55,56 @@ The end time of the event. This field may be empty if the event has not ended ye
**NOTE:** When reporting an event, you typically call the report function twice, the first time for starting of the event and the second time for ending of the event, both with the same UUID.
There are also cases where you would already have both the start time and end time. For example, when exporting events from a third-party system, the start time and end time are already known so you may simply call the report function once.
## How to Configure Alarms for Events
Events are derived from metrics, and can be the source to trigger alarms. For example, if a specific event occurs for a
certain times in a period, alarms can be triggered and sent.
Every event has a default `value = 1`, when `n` events with the same name are reported, they are aggregated
into `value = n` as follows.
```
Event{name=Unhealthy, source={service=A,instance=a}, ...}
Event{name=Unhealthy, source={service=A,instance=a}, ...}
Event{name=Unhealthy, source={service=A,instance=a}, ...}
Event{name=Unhealthy, source={service=A,instance=a}, ...}
Event{name=Unhealthy, source={service=A,instance=a}, ...}
Event{name=Unhealthy, source={service=A,instance=a}, ...}
```
will be aggregated into
```
Event{name=Unhealthy, source={service=A,instance=a}, ...} <value = 6>
```
so you can configure the following alarm rule to trigger alarm when `Unhealthy` event occurs more than 5 times within 10
minutes.
```yaml
rules:
unhealthy_event_rule:
metrics-name: Unhealthy
# Healthiness check is usually a scheduled task,
# they may be unhealthy for the first few times,
# and can be unhealthy occasionally due to network jitter,
# please adjust the threshold as per your actual situation.
threshold: 5
op: ">"
period: 10
count: 1
message: Service instance has been unhealthy for 10 minutes
```
For more alarm configuration details, please refer to the [alarm doc](../setup/backend/backend-alarm.md).
**Note** that the `Unhealthy` event above is only for demonstration, they are not detected by default in SkyWalking,
however, you can use the methods in [How to Report Events](#how-to-report-events) to report this kind of events.
## Known Events
| Name | Type | When |
| :----: | :----: | :-----|
| Start | Normal | When your Java Application starts with SkyWalking Agent installed, the `Start` Event will be created. |
| Shutdown | Normal | When your Java Application stops with SkyWalking Agent installed, the `Shutdown` Event will be created. |
| Alarm | Error | When the Alarm is triggered, the corresponding `Alarm` Event will is created. |
\ No newline at end of file
| Alarm | Error | When the Alarm is triggered, the corresponding `Alarm` Event will is created. |
......@@ -40,6 +40,19 @@ rules:
count: 1
tags:
level: WARNING
# unhealthy_event_rule:
# metrics-name: Unhealthy
# Healthiness check is usually a scheduled task,
# they may be unhealthy for the first few times,
# and can be unhealthy occasionally due to network jitter,
# please adjust the threshold as per your actual situation.
# threshold: 5
# op: ">"
# period: 10
# count: 1
# message: Service instance has been unhealthy for 10 minutes
# tags:
# level: ERROR
webhooks:
# - http://127.0.0.1/notify/
......
......@@ -24,15 +24,21 @@ import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import org.apache.skywalking.apm.util.StringUtil;
import org.apache.skywalking.oap.server.core.analysis.IDManager;
import org.apache.skywalking.oap.server.core.analysis.MetricsExtension;
import org.apache.skywalking.oap.server.core.analysis.Stream;
import org.apache.skywalking.oap.server.core.analysis.TimeBucket;
import org.apache.skywalking.oap.server.core.analysis.metrics.LongValueHolder;
import org.apache.skywalking.oap.server.core.analysis.metrics.Metrics;
import org.apache.skywalking.oap.server.core.analysis.metrics.MetricsMetaInfo;
import org.apache.skywalking.oap.server.core.analysis.metrics.WithMetadata;
import org.apache.skywalking.oap.server.core.analysis.worker.MetricsStreamProcessor;
import org.apache.skywalking.oap.server.core.remote.grpc.proto.RemoteData;
import org.apache.skywalking.oap.server.core.source.DefaultScopeDefine;
import org.apache.skywalking.oap.server.core.source.ScopeDeclaration;
import org.apache.skywalking.oap.server.core.storage.StorageHashMapBuilder;
import org.apache.skywalking.oap.server.core.storage.annotation.Column;
import org.elasticsearch.common.Strings;
import static org.apache.skywalking.oap.server.core.source.DefaultScopeDefine.EVENT;
......@@ -45,7 +51,7 @@ import static org.apache.skywalking.oap.server.core.source.DefaultScopeDefine.EV
of = "uuid"
)
@MetricsExtension(supportDownSampling = false, supportUpdate = true)
public class Event extends Metrics {
public class Event extends Metrics implements WithMetadata, LongValueHolder {
public static final String INDEX_NAME = "events";
......@@ -104,10 +110,14 @@ public class Event extends Metrics {
@Column(columnName = END_TIME)
private long endTime;
private transient long value = 1;
@Override
public boolean combine(final Metrics metrics) {
final Event event = (Event) metrics;
value++;
// Set time bucket only when it's never set.
if (getTimeBucket() <= 0) {
if (event.getStartTime() > 0) {
......@@ -193,6 +203,21 @@ public class Event extends Metrics {
return hashCode();
}
@Override
public MetricsMetaInfo getMeta() {
int scope = DefaultScopeDefine.SERVICE;
final String serviceId = IDManager.ServiceID.buildId(getService(), true);
String id = serviceId;
if (!Strings.isNullOrEmpty(getServiceInstance())) {
scope = DefaultScopeDefine.SERVICE_INSTANCE;
id = IDManager.ServiceInstanceID.buildId(serviceId, getServiceInstance());
} else if (!Strings.isNullOrEmpty(getEndpoint())) {
scope = DefaultScopeDefine.ENDPOINT;
id = IDManager.EndpointID.buildId(serviceId, getEndpoint());
}
return new MetricsMetaInfo(getName(), scope, id);
}
public static class Builder implements StorageHashMapBuilder<Event> {
@Override
public Map<String, Object> entity2Storage(Event storageData) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册