35 lines
1.5 KiB
YAML
35 lines
1.5 KiB
YAML
---
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
labels:
|
|
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
|
|
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
|
app.kubernetes.io/version: {{ $.Chart.Version }}
|
|
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
|
prometheus: k8s
|
|
role: alert-rules
|
|
name: grafana-rules
|
|
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
|
spec:
|
|
groups:
|
|
- name: GrafanaAlerts
|
|
rules:
|
|
- alert: GrafanaRequestsFailing
|
|
annotations:
|
|
message: '{{`{{`}} $labels.namespace }}/{{`{{`}} $labels.job }}/{{`{{`}} $labels.handler }} is experiencing {{`{{`}} $value | humanize }}% errors'
|
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/grafana/grafanarequestsfailing
|
|
expr: |
|
|
100 * namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query", status_code=~"5.."}
|
|
/ ignoring (status_code)
|
|
sum without (status_code) (namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query"})
|
|
> 50
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
- name: grafana_rules
|
|
rules:
|
|
- expr: |
|
|
sum by (namespace, job, handler, status_code) (rate(grafana_http_request_duration_seconds_count[5m]))
|
|
record: namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m
|