feat(thanos): prometheus rules added for sidecar

!4
This commit is contained in:
2023-09-28 02:04:57 +09:30
parent 014156eb05
commit be0161876d

View File

@ -0,0 +1,55 @@
{{ if .Values.nfc_monitoring.thanos.sidecar.enabled }}
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: metrics
app.kubernetes.io/name: thanos
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
prometheus: k8s
role: alert-rules
name: thanos-rules
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
spec:
groups:
- name: thanos-sidecar
rules:
- alert: ThanosSidecarBucketOperationsFailed
annotations:
description: Thanos Sidecar {{ `{{` }}$labels.instance}} bucket operations are failing
runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed
summary: Thanos Sidecar bucket operations are failing
expr: |
sum by (job, instance) (rate(thanos_objstore_bucket_operation_failures_total{job=~".*thanos-sidecar.*"}[5m])) > 0
for: 5m
labels:
severity: critical
- alert: ThanosSidecarNoConnectionToStartedPrometheus
annotations:
description: Thanos Sidecar {{ `{{` }}$labels.instance}} is unhealthy.
runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarnoconnectiontostartedprometheus
summary: Thanos Sidecar cannot access Prometheus, even though Prometheus seems
healthy and has reloaded WAL.
expr: |
thanos_sidecar_prometheus_up{job=~".*thanos-sidecar.*"} == 0
AND on (namespace, pod)
prometheus_tsdb_data_replay_duration_seconds != 0
for: 5m
labels:
severity: critical
- alert: ThanosSidecarIsDown
annotations:
description: ThanosSidecar has disappeared. Prometheus target for the component
cannot be discovered.
runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarisdown
summary: Thanos component has disappeared.
expr: |
absent(up{job=~".*thanos-sidecar.*"} == 1)
for: 5m
labels:
severity: critical
{{ end }}