55 lines
2.2 KiB
YAML
55 lines
2.2 KiB
YAML
{{ if .Values.nfc_monitoring.thanos.sidecar.enabled }}
|
|
---
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
labels:
|
|
app.kubernetes.io/component: metrics
|
|
app.kubernetes.io/name: thanos
|
|
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
|
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
|
app.kubernetes.io/version: {{ $.Chart.Version }}
|
|
prometheus: k8s
|
|
role: alert-rules
|
|
name: thanos-rules
|
|
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
|
spec:
|
|
groups:
|
|
- name: thanos-sidecar
|
|
rules:
|
|
- alert: ThanosSidecarBucketOperationsFailed
|
|
annotations:
|
|
description: Thanos Sidecar {{ `{{` }}$labels.instance}} bucket operations are failing
|
|
runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed
|
|
summary: Thanos Sidecar bucket operations are failing
|
|
expr: |
|
|
sum by (job, instance) (rate(thanos_objstore_bucket_operation_failures_total{job=~".*thanos-sidecar.*"}[5m])) > 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
- alert: ThanosSidecarNoConnectionToStartedPrometheus
|
|
annotations:
|
|
description: Thanos Sidecar {{ `{{` }}$labels.instance}} is unhealthy.
|
|
runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarnoconnectiontostartedprometheus
|
|
summary: Thanos Sidecar cannot access Prometheus, even though Prometheus seems
|
|
healthy and has reloaded WAL.
|
|
expr: |
|
|
thanos_sidecar_prometheus_up{job=~".*thanos-sidecar.*"} == 0
|
|
AND on (namespace, pod)
|
|
prometheus_tsdb_data_replay_duration_seconds != 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
- alert: ThanosSidecarIsDown
|
|
annotations:
|
|
description: ThanosSidecar has disappeared. Prometheus target for the component
|
|
cannot be discovered.
|
|
runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarisdown
|
|
summary: Thanos component has disappeared.
|
|
expr: |
|
|
absent(up{job=~".*thanos-sidecar.*"} == 1)
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
|
|
{{ end }} |