diff --git a/CHANGES.md b/CHANGES.md index 35e8874412fad8a6269c747ed1112a2e5f32419a..4e41fc7c4e6001cbe12143c27601cb1ba91a7fae 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -68,6 +68,8 @@ Release Notes. * Optimize the self monitoring grafana dashboard. * Enhance the export service. * Add function `retagByK8sMeta` and opt type `K8sRetagType.Pod2Service` in MAL for k8s to relate pods and services. +* Using "service.istio.io/canonical-name" to replace "app" label to resolve Envoy ALS service name. +* Support k8s monitoring. * Make the flushing metrics operation concurrent. * Fix ALS K8SServiceRegistry didn't remove the correct entry. * Using "service.istio.io/canonical-name" to replace "app" label to resolve Envoy ALS service name. diff --git a/docs/en/setup/backend/backend-receivers.md b/docs/en/setup/backend/backend-receivers.md index 4512aed053b874c2bb48c044a437eb011bb57256..b02b14b8135b6a53773ab40185d62880baa8a0ce 100644 --- a/docs/en/setup/backend/backend-receivers.md +++ b/docs/en/setup/backend/backend-receivers.md @@ -132,6 +132,9 @@ to be the identification of the metric data. |istio-controlplane| Metrics of Istio control panel | otel-oc-rules/istio-controlplane.yaml | Istio Control Panel -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server | |oap| Metrics of SkyWalking OAP server itself | otel-oc-rules/oap.yaml | SkyWalking OAP Server(SelfObservability) -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server | |vm| Metrics of VMs | otel-oc-rules/vm.yaml | Prometheus node-exporter(VMs) -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server | +|k8s-cluster| Metrics of K8s cluster | otel-oc-rules/k8s-cluster.yaml | K8s kube-state-metrics -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server | +|k8s-node| Metrics of K8s cluster | otel-oc-rules/k8s-node.yaml | cAdvisor & K8s kube-state-metrics -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server | +|k8s-service| Metrics of K8s cluster | otel-oc-rules/k8s-service.yaml | cAdvisor & K8s kube-state-metrics -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server | ## Meter receiver diff --git a/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/K8sRetagType.java b/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/K8sRetagType.java index 5b179f69ca977f61e2782d4da150b32dfcfbfd46..df12ed667fefd182f4d61c580f5798e8ea79df7d 100644 --- a/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/K8sRetagType.java +++ b/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/K8sRetagType.java @@ -27,7 +27,6 @@ import org.apache.skywalking.oap.meter.analyzer.dsl.Sample; import org.apache.skywalking.oap.meter.analyzer.k8s.K8sInfoRegistry; public enum K8sRetagType implements Retag { - Pod2Service { @Override public Sample[] execute(final Sample[] ss, @@ -39,11 +38,12 @@ public enum K8sRetagType implements Retag { String namespace = sample.getLabels().get(namespaceLabelName); if (!Strings.isNullOrEmpty(podName) && !Strings.isNullOrEmpty(namespace)) { String serviceName = K8sInfoRegistry.getInstance().findServiceName(namespace, podName); - if (!Strings.isNullOrEmpty(serviceName)) { - Map labels = Maps.newHashMap(sample.getLabels()); - labels.put(newLabelName, serviceName); - return sample.toBuilder().labels(ImmutableMap.copyOf(labels)).build(); + if (Strings.isNullOrEmpty(serviceName)) { + serviceName = BLANK; } + Map labels = Maps.newHashMap(sample.getLabels()); + labels.put(newLabelName, serviceName); + return sample.toBuilder().labels(ImmutableMap.copyOf(labels)).build(); } return sample; }).toArray(Sample[]::new); diff --git a/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/Retag.java b/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/Retag.java index e02a9713c0a2b3b80589c2adc7c591a21a1ad8ad..070b9c31dcb5b5cc51b419fa6f882d69380af569 100644 --- a/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/Retag.java +++ b/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/Retag.java @@ -21,5 +21,6 @@ package org.apache.skywalking.oap.meter.analyzer.dsl.tagOpt; import org.apache.skywalking.oap.meter.analyzer.dsl.Sample; public interface Retag { + String BLANK = ""; Sample[] execute(Sample[] ss, String newLabelName, String existingLabelName, String namespaceLabelName); } diff --git a/oap-server/analyzer/meter-analyzer/src/test/java/org/apache/skywalking/oap/meter/analyzer/dsl/K8sTagTest.java b/oap-server/analyzer/meter-analyzer/src/test/java/org/apache/skywalking/oap/meter/analyzer/dsl/K8sTagTest.java index fbdf1adcc3234884e774e4c42e8658ce913039c4..7a4e75ea3c836765b913646b4ccb15156912dbb7 100644 --- a/oap-server/analyzer/meter-analyzer/src/test/java/org/apache/skywalking/oap/meter/analyzer/dsl/K8sTagTest.java +++ b/oap-server/analyzer/meter-analyzer/src/test/java/org/apache/skywalking/oap/meter/analyzer/dsl/K8sTagTest.java @@ -28,6 +28,7 @@ import java.util.Collection; import java.util.Map; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; +import org.apache.skywalking.oap.meter.analyzer.dsl.tagOpt.Retag; import org.apache.skywalking.oap.meter.analyzer.k8s.K8sInfoRegistry; import org.junit.Before; import org.junit.Test; @@ -133,7 +134,7 @@ public class K8sTagTest { .labels( of( "namespace", "default", "container", "my-nginx", "cpu", "total", "pod", - "my-nginx-5dc4865748-no-pod" + "my-nginx-5dc4865748-no-pod" , "service", Retag.BLANK )) .value(2) .build(), @@ -175,7 +176,7 @@ public class K8sTagTest { .labels( of( "namespace", "default", "container", "my-nginx", "cpu", "total", "pod", - "my-nginx-5dc4865748-no-service" + "my-nginx-5dc4865748-no-service" , "service", Retag.BLANK )) .value(2) .build(), diff --git a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-cluster.yaml b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-cluster.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3ed97c266eb08b2418613510c25de9b21096b46 --- /dev/null +++ b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-cluster.yaml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This will parse a textual representation of a duration. The formats +# accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS} +# with days considered to be exactly 24 hours. +#

+# Examples: +#

+#    "PT20.345S" -- parses as "20.345 seconds"
+#    "PT15M"     -- parses as "15 minutes" (where a minute is 60 seconds)
+#    "PT10H"     -- parses as "10 hours" (where an hour is 3600 seconds)
+#    "P2D"       -- parses as "2 days" (where a day is 24 hours or 86400 seconds)
+#    "P2DT3H4M"  -- parses as "2 days, 3 hours and 4 minutes"
+#    "P-6H3M"    -- parses as "-6 hours and +3 minutes"
+#    "-P6H3M"    -- parses as "-6 hours and -3 minutes"
+#    "-P-6H+3M"  -- parses as "+6 hours and -3 minutes"
+# 
+expSuffix: tag({tags -> tags.cluster = 'k8s-cluster::' + tags.cluster}).service(['cluster']) +metricPrefix: k8s_cluster +metricsRules: + + + - name: cpu_cores + exp: (kube_node_status_capacity * 1000).tagEqual('resource' , 'cpu').sum(['cluster']) + - name: cpu_cores_allocatable + exp: (kube_node_status_allocatable * 1000).tagEqual('resource' , 'cpu').sum(['cluster']) + - name: cpu_cores_requests + exp: (kube_pod_container_resource_requests * 1000).tagEqual('resource' , 'cpu').sum(['cluster']) + - name: cpu_cores_limits + exp: (kube_pod_container_resource_limits * 1000).tagEqual('resource' , 'cpu').sum(['cluster']) + + - name: memory_total + exp: kube_node_status_capacity.tagEqual('resource' , 'memory').sum(['cluster']) + - name: memory_allocatable + exp: kube_node_status_allocatable.tagEqual('resource' , 'memory').sum(['cluster']) + - name: memory_requests + exp: kube_pod_container_resource_requests.tagEqual('resource' , 'memory').sum(['cluster']) + - name: memory_limits + exp: kube_pod_container_resource_limits.tagEqual('resource' , 'memory').sum(['cluster']) + + - name: storage_total + exp: kube_node_status_capacity.tagEqual('resource' , 'ephemeral_storage').sum(['cluster']) + - name: storage_allocatable + exp: kube_node_status_allocatable.tagEqual('resource' , 'ephemeral_storage').sum(['cluster']) + + - name: node_total + exp: kube_node_info.sum(['cluster']) + - name: node_status + exp: kube_node_status_condition.valueEqual(1).tagMatch('status' , 'true|unknown').sum(['cluster' , 'node' ,'condition']) + + - name: namespace_total + exp: kube_namespace_labels.sum(['cluster']) + + - name: deployment_total + exp: kube_deployment_labels.sum(['cluster']) + - name: deployment_status + exp: kube_deployment_status_condition.valueEqual(1).tagMatch('condition' , 'Available').sum(['cluster' , 'deployment' ,'condition' , 'status']).tag({tags -> tags.remove('condition')}) + - name: deployment_spec_replicas + exp: kube_deployment_spec_replicas.sum(['cluster' , 'deployment']) + + - name: service_total + exp: kube_service_info.sum(['cluster']) + - name: service_pod_status + exp: kube_pod_status_phase.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').valueEqual(1).sum(['cluster' , 'service' , 'phase']) + + - name: pod_total + exp: kube_pod_info.sum(['cluster']) + - name: pod_status_not_running + exp: kube_pod_status_phase.valueEqual(1).tagNotMatch('phase' , 'Running').sum(['cluster' , 'pod' , 'phase']) + + - name: container_total + exp: kube_pod_container_info.sum(['cluster']) + - name: pod_status_waiting + exp: kube_pod_container_status_waiting_reason.valueEqual(1).sum(['cluster' , 'pod' , 'container' , 'reason']) + - name: pod_status_terminated + exp: kube_pod_container_status_terminated_reason.valueEqual(1).sum(['cluster' , 'pod' , 'container' , 'reason']) diff --git a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f32102a50dcbf4975ab0a82dc720ea55fa42054 --- /dev/null +++ b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This will parse a textual representation of a duration. The formats +# accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS} +# with days considered to be exactly 24 hours. +#

+# Examples: +#

+#    "PT20.345S" -- parses as "20.345 seconds"
+#    "PT15M"     -- parses as "15 minutes" (where a minute is 60 seconds)
+#    "PT10H"     -- parses as "10 hours" (where an hour is 3600 seconds)
+#    "P2D"       -- parses as "2 days" (where a day is 24 hours or 86400 seconds)
+#    "P2DT3H4M"  -- parses as "2 days, 3 hours and 4 minutes"
+#    "P-6H3M"    -- parses as "-6 hours and +3 minutes"
+#    "-P6H3M"    -- parses as "-6 hours and -3 minutes"
+#    "-P-6H+3M"  -- parses as "+6 hours and -3 minutes"
+# 
+ +expSuffix: tag({tags -> tags.cluster = 'k8s-cluster::' + tags.cluster}).instance(['cluster'] , ['node']) +metricPrefix: k8s_node +metricsRules: + + - name: cpu_cores + exp: (kube_node_status_capacity * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node']) + - name: cpu_usage + exp: (container_cpu_usage_seconds_total * 1000).tagEqual('id' , '/').sum(['cluster' , 'node']).rate('PT1M') + - name: cpu_cores_allocatable + exp: (kube_node_status_allocatable * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node']) + - name: cpu_cores_requests + exp: (kube_pod_container_resource_requests * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node']) + - name: cpu_cores_limits + exp: (kube_pod_container_resource_limits * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node']) + + - name: memory_total + exp: kube_node_status_capacity.tagEqual('resource' , 'memory').sum(['cluster' , 'node']) + - name: memory_allocatable + exp: kube_node_status_allocatable.tagEqual('resource' , 'memory').sum(['cluster' , 'node']) + - name: memory_requests + exp: kube_pod_container_resource_requests.tagEqual('resource' , 'memory').sum(['cluster' , 'node']) + - name: memory_limits + exp: kube_pod_container_resource_limits.tagEqual('resource' , 'memory').sum(['cluster' , 'node']) + + - name: memory_usage + exp: container_memory_working_set_bytes.tagEqual('id' , '/').sum(['cluster' , 'node']) + + + - name: storage_total + exp: kube_node_status_capacity.tagEqual('resource' , 'ephemeral_storage').sum(['cluster' , 'node']) + - name: storage_allocatable + exp: kube_node_status_allocatable.tagEqual('resource' , 'ephemeral_storage').sum(['cluster' , 'node']) + + - name: node_status + exp: kube_node_status_condition.valueEqual(1).tagMatch('status' , 'true|unknown').sum(['cluster' , 'node' ,'condition']) + + - name: pod_total + exp: kube_pod_info.sum(['cluster' , 'node']) + + - name: network_receive + exp: container_network_receive_bytes_total.sum(['cluster' , 'node']).irate() + - name: network_transmit + exp: container_network_transmit_bytes_total.sum(['cluster' , 'node']).irate() diff --git a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79b86c24609398aea021662208a1efa5807bf835 --- /dev/null +++ b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml @@ -0,0 +1,66 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This will parse a textual representation of a duration. The formats +# accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS} +# with days considered to be exactly 24 hours. +#

+# Examples: +#

+#    "PT20.345S" -- parses as "20.345 seconds"
+#    "PT15M"     -- parses as "15 minutes" (where a minute is 60 seconds)
+#    "PT10H"     -- parses as "10 hours" (where an hour is 3600 seconds)
+#    "P2D"       -- parses as "2 days" (where a day is 24 hours or 86400 seconds)
+#    "P2DT3H4M"  -- parses as "2 days, 3 hours and 4 minutes"
+#    "P-6H3M"    -- parses as "-6 hours and +3 minutes"
+#    "-P6H3M"    -- parses as "-6 hours and -3 minutes"
+#    "-P-6H+3M"  -- parses as "+6 hours and -3 minutes"
+# 
+expSuffix: tag({tags -> tags.cluster = 'k8s-cluster::' + tags.cluster}).endpoint(['cluster'] , ['service']) +metricPrefix: k8s_service +metricsRules: + + - name: pod_total + exp: kube_pod_info.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service']) + + - name: cpu_cores_requests + exp: (kube_pod_container_resource_requests * 1000).retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').tagEqual('resource' , 'cpu').sum(['cluster' , 'service']) + - name: cpu_cores_limits + exp: (kube_pod_container_resource_limits * 1000).retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').tagEqual('resource' , 'cpu').sum(['cluster' , 'service']) + - name: memory_requests + exp: kube_pod_container_resource_requests.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').tagEqual('resource' , 'memory').sum(['cluster' , 'service']) + - name: memory_limits + exp: kube_pod_container_resource_limits.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').tagEqual('resource' , 'memory').sum(['cluster' , 'service']) + + - name: pod_status + exp: kube_pod_status_phase.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').valueEqual(1).sum(['cluster' , 'service' , 'pod' , 'phase']) + - name: pod_status_waiting + exp: kube_pod_container_status_waiting_reason.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').valueEqual(1).sum(['cluster' , 'service' , 'pod' , 'container' , 'reason']) + - name: pod_status_terminated + exp: kube_pod_container_status_terminated_reason.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').valueEqual(1).sum(['cluster' , 'service' , 'pod' , 'container' , 'reason']) + - name: pod_status_restarts_total + exp: kube_pod_container_status_restarts_total.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']) + + - name: pod_cpu_usage + exp: (container_cpu_usage_seconds_total * 1000).tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).rate('PT1M') + - name: pod_memory_usage + exp: container_memory_working_set_bytes.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']) + + - name: pod_network_receive + exp: container_network_receive_bytes_total.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate() + - name: pod_network_transmit + exp: container_network_transmit_bytes_total.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate() + - name: pod_fs_usage + exp: container_fs_usage_bytes.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']) diff --git a/oap-server/server-bootstrap/src/main/resources/ui-initialized-templates/k8s.yml b/oap-server/server-bootstrap/src/main/resources/ui-initialized-templates/k8s.yml new file mode 100644 index 0000000000000000000000000000000000000000..9745d0d33215c67e9f923f25ff5fa210db403320 --- /dev/null +++ b/oap-server/server-bootstrap/src/main/resources/ui-initialized-templates/k8s.yml @@ -0,0 +1,512 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# UI templates initialized file includes the default template when the SkyWalking OAP starts up at the first time. +# +# Also, SkyWalking would detect the existing templates in the database, once they are missing, all templates in this file +# could be added automatically. + +templates: + - name: "K8s" + # The type includes DASHBOARD, TOPOLOGY_INSTANCE, TOPOLOGY_ENDPOINT. + # DASHBOARD type templates could have multiple definitions, by using different names. + # TOPOLOGY_INSTANCE, TOPOLOGY_ENDPOINT type templates should be defined once, as they are used in the topology page only. + type: "DASHBOARD" + # Configuration could be defined through UI, and use `export` to format in the standard JSON. + configuration: |- + [ + { + "name": "K8s", + "type": "service", + "serviceGroup": "k8s-cluster", + "children": [ + { + "name": "Cluster", + "children": [ + { + "width": "2", + "title": "Node Total", + "height": "100", + "entityType": "Service", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_cluster_node_total", + "queryMetricType": "readMetricsValue", + "chartType": "ChartNum" + }, + { + "width": "2", + "title": "Namespace Total", + "height": "100", + "entityType": "Service", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_cluster_namespace_total", + "queryMetricType": "readMetricsValue", + "chartType": "ChartNum" + }, + { + "width": "2", + "title": "Deployment Total", + "height": "100", + "entityType": "Service", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_cluster_deployment_total", + "queryMetricType": "readMetricsValue", + "chartType": "ChartNum" + }, + { + "width": "2", + "title": "Service Total", + "height": "100", + "entityType": "Service", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_cluster_service_total", + "queryMetricType": "readMetricsValue", + "chartType": "ChartNum" + }, + { + "width": "2", + "title": "Pod Total", + "height": "100", + "entityType": "Service", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_cluster_pod_total", + "queryMetricType": "readMetricsValue", + "chartType": "ChartNum" + }, + { + "width": 2, + "title": "Container Total", + "height": "100", + "entityType": "Service", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_cluster_container_total", + "queryMetricType": "readMetricsValue", + "chartType": "ChartNum" + }, + { + "width": "4", + "title": "CPU Resources", + "height": 200, + "entityType": "Service", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_cluster_cpu_cores,k8s_cluster_cpu_cores_requests,k8s_cluster_cpu_cores_limits,k8s_cluster_cpu_cores_allocatable", + "queryMetricType": "readMetricsValues", + "chartType": "ChartArea", + "unit": "m" + }, + { + "width": "4", + "title": "Memory Resources", + "height": 200, + "entityType": "Service", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_cluster_memory_total,k8s_cluster_memory_requests,k8s_cluster_memory_limits,k8s_cluster_memory_allocatable", + "queryMetricType": "readMetricsValues", + "chartType": "ChartArea", + "aggregation": "/", + "aggregationNum": "1073741824", + "unit": "GB" + }, + { + "width": "4", + "title": "Storage Resources", + "height": 200, + "entityType": "Service", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_cluster_storage_total,k8s_cluster_storage_allocatable", + "queryMetricType": "readMetricsValues", + "chartType": "ChartArea", + "aggregation": "/", + "aggregationNum": "1073741824", + "unit": "GB" + }, + { + "width": "4", + "title": "Node Status", + "height": "200", + "entityType": "Service", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_cluster_node_status", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartTable", + "tableHeaderCol1": "Status-Node", + "showTableValues": "false" + }, + { + "width": "4", + "title": "Deployment Status", + "height": 200, + "entityType": "Service", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_cluster_deployment_status", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartTable", + "tableHeaderCol1": "Deployment-Available", + "showTableValues": "false" + }, + { + "width": "4", + "title": "Deployment Spec Replicas", + "height": 200, + "entityType": "Service", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartTable", + "metricName": "k8s_cluster_deployment_spec_replicas", + "showTableValues": "true", + "tableHeaderCol1": "Deployment", + "tableHeaderCol2": "Replicas" + }, + { + "width": "4", + "title": "Service Status", + "height": 200, + "entityType": "Service", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_cluster_service_pod_status", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartTable", + "showTableValues": "false", + "tableHeaderCol1": "Status-Service" + }, + { + "width": "4", + "title": "Pod Status Not Running", + "height": 200, + "entityType": "Service", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_cluster_pod_status_not_running", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartTable", + "tableHeaderCol1": "Status-Pod" + }, + { + "width": "4", + "title": "Pod Status Waiting", + "height": 200, + "entityType": "Service", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_cluster_pod_status_waiting", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartTable", + "tableHeaderCol1": "Container-Pod-Waiting Reason", + "showTableValues": "false" + }, + { + "width": "4", + "title": "Pod Status Terminated", + "height": 200, + "entityType": "Service", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_cluster_container_status_terminated", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartBar" + } + ] + }, + { + "name": "Node", + "children": [ + { + "width": "3", + "title": "Pod Total", + "height": 350, + "entityType": "ServiceInstance", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_node_pod_total", + "queryMetricType": "readMetricsValue", + "chartType": "ChartNum" + }, + { + "width": "3", + "title": "Node Status", + "height": 350, + "entityType": "ServiceInstance", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_node_node_status", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartTable", + "tableHeaderCol1": "Status", + "showTableValues": "false" + }, + { + "width": "3", + "title": "CPU Resources", + "height": "350", + "entityType": "ServiceInstance", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_node_cpu_cores,k8s_node_cpu_cores_allocatable,k8s_node_cpu_cores_requests,k8s_node_cpu_cores_limits", + "queryMetricType": "readMetricsValues", + "chartType": "ChartArea", + "unit": "m" + }, + { + "width": "3", + "title": "Memory Resources", + "height": "350", + "entityType": "ServiceInstance", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "unit": "GB", + "queryMetricType": "readMetricsValues", + "chartType": "ChartArea", + "metricName": "k8s_node_memory_total,k8s_node_memory_allocatable,k8s_node_memory_requests,k8s_node_memory_limits", + "aggregation": "/", + "aggregationNum": "1073741824" + }, + { + "width": "3", + "title": "Storage Resources", + "height": 350, + "entityType": "ServiceInstance", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "unit": "GB", + "queryMetricType": "readMetricsValues", + "chartType": "ChartArea", + "metricName": "k8s_node_storage_total,k8s_node_storage_allocatable", + "aggregation": "/", + "aggregationNum": "1073741824" + }, + { + "width": 3, + "title": "CPU Usage", + "height": 350, + "entityType": "ServiceInstance", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_node_cpu_usage", + "queryMetricType": "readMetricsValues", + "chartType": "ChartLine", + "unit": "m" + }, + { + "width": 3, + "title": "Memory Usage", + "height": 350, + "entityType": "ServiceInstance", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_node_memory_usage", + "queryMetricType": "readMetricsValues", + "chartType": "ChartLine", + "aggregation": "/", + "aggregationNum": "1073741824", + "unit": "GB" + }, + { + "width": "3", + "title": "Network I/O", + "height": 350, + "entityType": "ServiceInstance", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_node_network_receive,k8s_node_network_transmit", + "queryMetricType": "readMetricsValues", + "chartType": "ChartLine", + "unit": "KB/s", + "aggregation": "/", + "aggregationNum": "1024" + } + ] + }, + { + "name": "Service", + "children": [ + { + "width": 3, + "title": "Service Pod Total", + "height": "248", + "entityType": "Endpoint", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_service_pod_total", + "queryMetricType": "readMetricsValue", + "chartType": "ChartNum" + }, + { + "width": 3, + "title": "Service Pod Status", + "height": "248", + "entityType": "Endpoint", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_service_pod_status", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartTable", + "tableHeaderCol1": "Status-Pod", + "showTableValues": "false" + }, + { + "width": 3, + "title": "Service CPU Resources", + "height": "248", + "entityType": "Endpoint", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_service_cpu_cores_requests,k8s_service_cpu_cores_limits", + "queryMetricType": "readMetricsValues", + "chartType": "ChartArea", + "unit": "m" + }, + { + "width": 3, + "title": "Service Memory Resources", + "height": "248", + "entityType": "Endpoint", + "independentSelector": false, + "metricType": "REGULAR_VALUE", + "metricName": "k8s_service_memory_requests,k8s_service_memory_limits", + "queryMetricType": "readMetricsValues", + "chartType": "ChartArea", + "aggregation": "/", + "aggregationNum": "1048576", + "unit": "MB" + }, + { + "width": 3, + "title": "Pod CPU Usage", + "height": "248", + "entityType": "Endpoint", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_service_pod_cpu_usage", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartLine", + "unit": "m" + }, + { + "width": 3, + "title": "Pod Memory Usage", + "height": "248", + "entityType": "Endpoint", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_service_pod_memory_usage", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartLine", + "aggregation": "/", + "aggregationNum": "1048576", + "unit": "MB" + }, + { + "width": 3, + "title": "Pod Waiting", + "height": "248", + "entityType": "Endpoint", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_service_pod_status_waiting", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartTable", + "showTableValues": "false", + "tableHeaderCol1": "Container-Pod-Waiting Reason" + }, + { + "width": 3, + "title": "Pod Terminated", + "height": "248", + "entityType": "Endpoint", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_service_pod_status_terminated", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartBar" + }, + { + "width": 3, + "title": "Pod Restarts", + "height": "248", + "entityType": "Endpoint", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_service_pod_status_restarts_total", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartTable", + "tableHeaderCol1": "Pod", + "showTableValues": "true", + "tableHeaderCol2": "Restarts Total" + }, + { + "width": 3, + "title": "Pod Network Receive", + "height": "248", + "entityType": "Endpoint", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_service_pod_network_receive", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartLine", + "aggregation": "/", + "aggregationNum": "1024", + "unit": "KB/s" + }, + { + "width": 3, + "title": "Pod Network Transmit", + "height": "248", + "entityType": "Endpoint", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_service_pod_network_transmit", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartLine", + "aggregationNum": "1024", + "aggregation": "/", + "unit": "KB/s" + }, + { + "width": 3, + "title": "Pod Storage Usage", + "height": "248", + "entityType": "Endpoint", + "independentSelector": false, + "metricType": "LABELED_VALUE", + "metricName": "k8s_service_pod_fs_usage", + "queryMetricType": "readLabeledMetricsValues", + "chartType": "ChartArea", + "aggregation": "/", + "aggregationNum": "1048576", + "unit": "MB" + } + ] + } + ] + } + ] + # Activated as the DASHBOARD type, makes this templates added into the UI page automatically. + # False means providing a basic template, user needs to add it manually. + activated: true + # True means wouldn't show up on the dashboard. Only keeps the definition in the storage. + disabled: false