55
templates/PrometheusRule-thanos.yaml
Normal file
55
templates/PrometheusRule-thanos.yaml
Normal file
@ -0,0 +1,55 @@
|
||||
{{ if .Values.nfc_monitoring.thanos.sidecar.enabled }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics
|
||||
app.kubernetes.io/name: thanos
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
prometheus: k8s
|
||||
role: alert-rules
|
||||
name: thanos-rules
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
spec:
|
||||
groups:
|
||||
- name: thanos-sidecar
|
||||
rules:
|
||||
- alert: ThanosSidecarBucketOperationsFailed
|
||||
annotations:
|
||||
description: Thanos Sidecar {{ `{{` }}$labels.instance}} bucket operations are failing
|
||||
runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed
|
||||
summary: Thanos Sidecar bucket operations are failing
|
||||
expr: |
|
||||
sum by (job, instance) (rate(thanos_objstore_bucket_operation_failures_total{job=~".*thanos-sidecar.*"}[5m])) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ThanosSidecarNoConnectionToStartedPrometheus
|
||||
annotations:
|
||||
description: Thanos Sidecar {{ `{{` }}$labels.instance}} is unhealthy.
|
||||
runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarnoconnectiontostartedprometheus
|
||||
summary: Thanos Sidecar cannot access Prometheus, even though Prometheus seems
|
||||
healthy and has reloaded WAL.
|
||||
expr: |
|
||||
thanos_sidecar_prometheus_up{job=~".*thanos-sidecar.*"} == 0
|
||||
AND on (namespace, pod)
|
||||
prometheus_tsdb_data_replay_duration_seconds != 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ThanosSidecarIsDown
|
||||
annotations:
|
||||
description: ThanosSidecar has disappeared. Prometheus target for the component
|
||||
cannot be discovered.
|
||||
runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarisdown
|
||||
summary: Thanos component has disappeared.
|
||||
expr: |
|
||||
absent(up{job=~".*thanos-sidecar.*"} == 1)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
{{ end }}
|
Reference in New Issue
Block a user