55
									
								
								templates/PrometheusRule-thanos.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								templates/PrometheusRule-thanos.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,55 @@ | ||||
| {{ if .Values.nfc_monitoring.thanos.sidecar.enabled }} | ||||
| --- | ||||
| apiVersion: monitoring.coreos.com/v1 | ||||
| kind: PrometheusRule | ||||
| metadata: | ||||
|   labels: | ||||
|     app.kubernetes.io/component: metrics | ||||
|     app.kubernetes.io/name: thanos | ||||
|     app.kubernetes.io/part-of: {{  $.Chart.Name }} | ||||
|     app.kubernetes.io/managed-by: {{  $.Release.Service }} | ||||
|     app.kubernetes.io/version: {{  $.Chart.Version }} | ||||
|     prometheus: k8s | ||||
|     role: alert-rules | ||||
|   name: thanos-rules | ||||
|   namespace: {{  .Values.nfc_monitoring.prometheus.namespace }} | ||||
| spec: | ||||
|   groups: | ||||
|     - name: thanos-sidecar | ||||
|       rules: | ||||
|       - alert: ThanosSidecarBucketOperationsFailed | ||||
|         annotations: | ||||
|           description: Thanos Sidecar {{ `{{` }}$labels.instance}} bucket operations are failing | ||||
|           runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed | ||||
|           summary: Thanos Sidecar bucket operations are failing | ||||
|         expr: | | ||||
|           sum by (job, instance) (rate(thanos_objstore_bucket_operation_failures_total{job=~".*thanos-sidecar.*"}[5m])) > 0 | ||||
|         for: 5m | ||||
|         labels: | ||||
|           severity: critical | ||||
|       - alert: ThanosSidecarNoConnectionToStartedPrometheus | ||||
|         annotations: | ||||
|           description: Thanos Sidecar {{ `{{` }}$labels.instance}} is unhealthy. | ||||
|           runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarnoconnectiontostartedprometheus | ||||
|           summary: Thanos Sidecar cannot access Prometheus, even though Prometheus seems | ||||
|             healthy and has reloaded WAL. | ||||
|         expr: | | ||||
|           thanos_sidecar_prometheus_up{job=~".*thanos-sidecar.*"} == 0 | ||||
|           AND on (namespace, pod) | ||||
|           prometheus_tsdb_data_replay_duration_seconds != 0 | ||||
|         for: 5m | ||||
|         labels: | ||||
|           severity: critical | ||||
|       - alert: ThanosSidecarIsDown | ||||
|         annotations: | ||||
|           description: ThanosSidecar has disappeared. Prometheus target for the component | ||||
|             cannot be discovered. | ||||
|           runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarisdown | ||||
|           summary: Thanos component has disappeared. | ||||
|         expr: | | ||||
|           absent(up{job=~".*thanos-sidecar.*"} == 1) | ||||
|         for: 5m | ||||
|         labels: | ||||
|           severity: critical | ||||
|  | ||||
| {{ end }} | ||||
		Reference in New Issue
	
	Block a user