diff --git a/templates/PrometheusRule-thanos.yaml b/templates/PrometheusRule-thanos.yaml new file mode 100644 index 0000000..94a1402 --- /dev/null +++ b/templates/PrometheusRule-thanos.yaml @@ -0,0 +1,55 @@ +{{ if .Values.nfc_monitoring.thanos.sidecar.enabled }} +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/component: metrics + app.kubernetes.io/name: thanos + app.kubernetes.io/part-of: {{ $.Chart.Name }} + app.kubernetes.io/managed-by: {{ $.Release.Service }} + app.kubernetes.io/version: {{ $.Chart.Version }} + prometheus: k8s + role: alert-rules + name: thanos-rules + namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} +spec: + groups: + - name: thanos-sidecar + rules: + - alert: ThanosSidecarBucketOperationsFailed + annotations: + description: Thanos Sidecar {{ `{{` }}$labels.instance}} bucket operations are failing + runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed + summary: Thanos Sidecar bucket operations are failing + expr: | + sum by (job, instance) (rate(thanos_objstore_bucket_operation_failures_total{job=~".*thanos-sidecar.*"}[5m])) > 0 + for: 5m + labels: + severity: critical + - alert: ThanosSidecarNoConnectionToStartedPrometheus + annotations: + description: Thanos Sidecar {{ `{{` }}$labels.instance}} is unhealthy. + runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarnoconnectiontostartedprometheus + summary: Thanos Sidecar cannot access Prometheus, even though Prometheus seems + healthy and has reloaded WAL. + expr: | + thanos_sidecar_prometheus_up{job=~".*thanos-sidecar.*"} == 0 + AND on (namespace, pod) + prometheus_tsdb_data_replay_duration_seconds != 0 + for: 5m + labels: + severity: critical + - alert: ThanosSidecarIsDown + annotations: + description: ThanosSidecar has disappeared. Prometheus target for the component + cannot be discovered. + runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarisdown + summary: Thanos component has disappeared. + expr: | + absent(up{job=~".*thanos-sidecar.*"} == 1) + for: 5m + labels: + severity: critical + +{{ end }} \ No newline at end of file