Compare commits
76 Commits
0.2.0
...
developmen
Author | SHA1 | Date | |
---|---|---|---|
d25cee3c8c | |||
05bd0cd6d4 | |||
ac6b7ef31e | |||
6b44c1df69 | |||
d5deb022c5 | |||
21c5e87c85 | |||
6ac6ced4bb | |||
39f26c8f82 | |||
13e60d0a76 | |||
22d3308464 | |||
a83648e2ac | |||
b01c6bbb06 | |||
d6e21083c9 | |||
bfa20b6a09 | |||
d6cf67b930 | |||
5aea6f620e | |||
3a31680ab5 | |||
e22e98fd08 | |||
78ff4066ea | |||
3b33cf43d8 | |||
e5bc593611 | |||
e4a98648e2 | |||
8d1ad238e4 | |||
2bfa45d5a3 | |||
a33a0514d7 | |||
ea59c866d6 | |||
2bdde14a5a | |||
07be00f24c | |||
452087a111 | |||
df7917aef8 | |||
dbacb1794c | |||
eecb42a4e5 | |||
d53d28314e | |||
0a0d37e44d | |||
65e247958c | |||
fce97d5aa2 | |||
b2d3cad87d | |||
496c7637c3 | |||
99e503324d | |||
486f2c4728 | |||
a2c3daa44e | |||
faf4abf6b3 | |||
cd2b99dd3d | |||
efd6d15dc4 | |||
f08cba1dfb | |||
49bf414caa | |||
36ee3a10ff | |||
6a20b69910 | |||
1fd5e49247 | |||
5323377852 | |||
4e8f25ec3d | |||
cb12f338f1 | |||
39af78c6ea | |||
73f25cfaa2 | |||
beaa4f4896 | |||
e8b4b5a00b | |||
490e497d15 | |||
38f08985f5 | |||
c7746122cd | |||
57a1706590 | |||
bdb555a4b5 | |||
bdb3a09c2b | |||
9c35a4d140 | |||
bc4d72ff8e | |||
e13d55e61e | |||
257da9cd38 | |||
106f2e6ec8 | |||
f1c54567a7 | |||
cd2bceec3a | |||
18649086b5 | |||
e0cb8f57e2 | |||
c5bb46f48a | |||
103a529184 | |||
51a187bb75 | |||
f613ba29cd | |||
3eafca6c41 |
10
.nfc_automation.yaml
Normal file
10
.nfc_automation.yaml
Normal file
@ -0,0 +1,10 @@
|
||||
---
|
||||
|
||||
role_git_conf:
|
||||
gitlab:
|
||||
submodule_branch: "development"
|
||||
default_branch: development
|
||||
mr_labels: ~"type::automation" ~"impact::0" ~"priority::0"
|
||||
auto_merge: true
|
||||
merge_request:
|
||||
patch_labels: '~"code review::not started"'
|
Submodule gitlab-ci updated: a5a9fa4437...224ef83157
@ -1,9 +1,12 @@
|
||||
{{ if false }}
|
||||
# already on k3s
|
||||
---
|
||||
apiVersion: apiregistration.k8s.io/v1
|
||||
kind: APIService
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -17,3 +20,4 @@ spec:
|
||||
namespace: monitoring
|
||||
version: v1beta1
|
||||
versionPriority: 100
|
||||
{{ end }}
|
@ -4,10 +4,11 @@ kind: Alertmanager
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: main
|
||||
name: {{ $.Release.Name }}
|
||||
namespace: {{ .Values.nfc_monitoring.alert_manager.namespace | quote }}
|
||||
spec:
|
||||
image: "{{ .Values.nfc_monitoring.alert_manager.image.name }}:{{ .Values.nfc_monitoring.alert_manager.image.tag }}"
|
||||
@ -16,12 +17,12 @@ spec:
|
||||
podMetadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
replicas: 3
|
||||
replicas: {{ .Values.nfc_monitoring.alert_manager.replicas }}
|
||||
resources:
|
||||
limits:
|
||||
cpu: 100m
|
||||
|
@ -15,6 +15,7 @@ metadata:
|
||||
question.
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -32,12 +33,13 @@ spec:
|
||||
synchronize: true
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
namespace: "{{ `{{` }}request.object.metadata.name }}"
|
||||
data:
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 14 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
||||
|
@ -15,6 +15,7 @@ metadata:
|
||||
question.
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -32,20 +33,21 @@ spec:
|
||||
synchronize: true
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
namespace: "{{ `{{` }}request.object.metadata.name }}"
|
||||
data:
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 14 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
namespace: "{{ .Values.nfc_monitoring.prometheus.namespace }}"
|
||||
{{ end }}
|
||||
|
@ -5,6 +5,7 @@ metadata:
|
||||
name: grafana-agent
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -1,9 +1,11 @@
|
||||
{{ if false }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -22,3 +24,6 @@ rules:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
|
||||
# Already exists on k3s
|
||||
{{ end }}
|
@ -4,6 +4,7 @@ kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -5,6 +5,7 @@ metadata:
|
||||
name: hpa-controller-custom-metrics
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -4,6 +4,7 @@ kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
{{ if .Values.nfc_monitoring.additions.dashboard_sidecar.enabled -}}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
@ -5,6 +6,7 @@ kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -14,3 +16,4 @@ rules:
|
||||
resources: ["configmaps"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
@ -5,7 +6,7 @@ metadata:
|
||||
name: kube-monitor-proxy
|
||||
labels:
|
||||
app.kubernetes.io/component: proxy
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-monitor-proxy
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -19,3 +20,5 @@ rules:
|
||||
resources:
|
||||
- subjectaccessreviews
|
||||
verbs: ["create"]
|
||||
|
||||
{{ end }}
|
@ -1,10 +1,11 @@
|
||||
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -18,3 +19,5 @@ subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
|
||||
{{ end }}
|
@ -1,10 +1,11 @@
|
||||
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -130,3 +131,5 @@ rules:
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
|
||||
{{ end }}
|
||||
|
@ -4,6 +4,7 @@ kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -4,6 +4,7 @@ kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -4,10 +4,11 @@ kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
|
@ -4,6 +4,7 @@ kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
{{ if .Values.nfc_monitoring.additions.dashboard_sidecar.enabled -}}
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
@ -5,6 +6,7 @@ apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -18,3 +20,4 @@ subjects:
|
||||
name: grafana
|
||||
namespace: "{{ .Values.nfc_monitoring.grafana.namespace }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -5,7 +5,7 @@ metadata:
|
||||
name: kube-monitor-proxy
|
||||
labels:
|
||||
app.kubernetes.io/component: proxy
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-monitor-proxy
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
@ -4,15 +4,16 @@ kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
@ -6,6 +7,7 @@ metadata:
|
||||
namespace: "{{ .Values.nfc_monitoring.grafana.namespace }}"
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -123,4 +125,4 @@ data:
|
||||
}
|
||||
}
|
||||
|
||||
---
|
||||
{{- end }}
|
||||
|
@ -4,6 +4,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -15,7 +16,7 @@ data:
|
||||
wal_directory: /tmp/wal
|
||||
|
||||
|
||||
|
||||
{{ if .Values.nfc_monitoring.loki.config }}
|
||||
logs:
|
||||
positions_directory: "/tmp"
|
||||
|
||||
@ -234,7 +235,7 @@ data:
|
||||
- target_label: node
|
||||
source_labels:
|
||||
- __meta_kubernetes_pod_node_name
|
||||
|
||||
{{ end }}
|
||||
|
||||
integrations:
|
||||
|
||||
@ -262,6 +263,8 @@ data:
|
||||
|
||||
syslog_server.yaml: |
|
||||
# REF: https://grafana.com/docs/loki/latest/send-data/promtail/configuration/#example-syslog-config
|
||||
|
||||
{{ if .Values.nfc_monitoring.loki.config }}
|
||||
server:
|
||||
http_listen_port: 9080
|
||||
grpc_listen_port: 0
|
||||
@ -281,3 +284,5 @@ data:
|
||||
relabel_configs:
|
||||
- source_labels: ['__syslog_message_hostname']
|
||||
target_label: 'host'
|
||||
{{ end }}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
{{ if .Values.nfc_monitoring.additions.dashboard_sidecar.enabled -}}
|
||||
---
|
||||
# Provisioning config
|
||||
@ -6,6 +7,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -27,3 +29,4 @@ data:
|
||||
path: /var/lib/grafana/dashboards
|
||||
foldersFromFilesStructure: true
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -4,6 +4,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -4,6 +4,7 @@ kind: DaemonSet
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -16,6 +17,7 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -26,6 +28,7 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 8 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -1,10 +1,11 @@
|
||||
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: proxy
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-monitor-proxy
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -19,7 +20,7 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: proxy
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-monitor-proxy
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -28,7 +29,7 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: proxy
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-monitor-proxy
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -134,3 +135,5 @@ spec:
|
||||
serviceAccountName: kube-monitor-proxy
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
|
||||
{{ end }}
|
||||
|
@ -1,10 +1,11 @@
|
||||
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -16,7 +17,7 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -26,7 +27,7 @@ spec:
|
||||
kubectl.kubernetes.io/default-container: kube-state-metrics
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -110,3 +111,5 @@ spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
serviceAccountName: kube-state-metrics
|
||||
|
||||
{{ end }}
|
||||
|
@ -4,6 +4,7 @@ kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -14,6 +15,7 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -25,6 +27,7 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 8 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -1,9 +1,11 @@
|
||||
---
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
apiVersion: grafana.integreatly.org/v1beta1
|
||||
kind: Grafana
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -16,6 +18,7 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 8 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -24,6 +27,7 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 10 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
strategy:
|
||||
@ -34,6 +38,7 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 12 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -91,7 +96,7 @@ spec:
|
||||
name: dashboards
|
||||
|
||||
- image: "{{ .Values.nfc_monitoring.additions.dashboard_sidecar.image.name }}:{{ .Values.nfc_monitoring.additions.dashboard_sidecar.image.tag}}"
|
||||
name: k8s-sidecar
|
||||
name: sidecar
|
||||
env:
|
||||
- name: LABEL
|
||||
value: "{{ .Values.nfc_monitoring.additions.dashboard_sidecar.label_name }}"
|
||||
@ -147,3 +152,5 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: "5Gi"
|
||||
|
||||
{{- end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
---
|
||||
apiVersion: grafana.integreatly.org/v1beta1
|
||||
kind: GrafanaDashboard
|
||||
@ -11,8 +12,10 @@ spec:
|
||||
instanceSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: graphing
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: grafana
|
||||
grafanaCom:
|
||||
id: 9578
|
||||
revision: 4 # as @ 19-09-23
|
||||
revision: 4 # as @ 19-09-23
|
||||
|
||||
{{- end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
---
|
||||
{{- if .Values.nfc_monitoring.additions.ceph.enabled | default false -}}
|
||||
apiVersion: grafana.integreatly.org/v1beta1
|
||||
@ -12,10 +13,11 @@ spec:
|
||||
instanceSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: graphing
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: grafana
|
||||
grafanaCom:
|
||||
id: 2842
|
||||
revision: 17 # as @ 19-09-23
|
||||
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
{{- end }}
|
25
templates/GrafanaDashboard-calico.yaml
Normal file
25
templates/GrafanaDashboard-calico.yaml
Normal file
@ -0,0 +1,25 @@
|
||||
---
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
{{- if eq .Values.nfc_monitoring.kubernetes.networking "calico" }}
|
||||
{{- if .Values.nfc_monitoring.prometheus.service_monitor.calico }}
|
||||
apiVersion: grafana.integreatly.org/v1beta1
|
||||
kind: GrafanaDashboard
|
||||
metadata:
|
||||
name: calico-felix
|
||||
namespace: {{ .Values.nfc_monitoring.grafana.namespace }}
|
||||
spec:
|
||||
allowCrossNamespaceImport: true
|
||||
folder: No Fuss Monitoring
|
||||
resyncPeriod: 1d
|
||||
instanceSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: graphing
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: grafana
|
||||
grafanaCom:
|
||||
id: 12175
|
||||
revision: 5 # as @ 2020-05-04T16:47:08
|
||||
|
||||
{{- end }}
|
||||
{{ end }}
|
||||
{{ end }}
|
21
templates/GrafanaDashboard-cert-manager.yaml
Normal file
21
templates/GrafanaDashboard-cert-manager.yaml
Normal file
@ -0,0 +1,21 @@
|
||||
---
|
||||
{{ if .Values.nfc_monitoring.grafana.dashboards.cert_manager | default false -}}
|
||||
apiVersion: grafana.integreatly.org/v1beta1
|
||||
kind: GrafanaDashboard
|
||||
metadata:
|
||||
name: node-exporter
|
||||
namespace: {{ .Values.nfc_monitoring.grafana.namespace }}
|
||||
spec:
|
||||
allowCrossNamespaceImport: true
|
||||
folder: No Fuss Monitoring
|
||||
resyncPeriod: 1d
|
||||
instanceSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: graphing
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: grafana
|
||||
grafanaCom:
|
||||
id: 11001
|
||||
revision: 1 # as @ 2019-10-16T13:48:56
|
||||
|
||||
{{- end }}
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
apiVersion: grafana.integreatly.org/v1beta1
|
||||
kind: GrafanaDashboard
|
||||
metadata:
|
||||
@ -11,9 +12,10 @@ spec:
|
||||
instanceSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: graphing
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: grafana
|
||||
{{ $Dashboard := .Files.Get "files/dashboard-summary.json" | fromJson }}
|
||||
json: >-
|
||||
{{ $Dashboard | toRawJson }}
|
||||
|
||||
{{- end }}
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
apiVersion: grafana.integreatly.org/v1beta1
|
||||
kind: GrafanaDashboard
|
||||
metadata:
|
||||
@ -11,8 +12,10 @@ spec:
|
||||
instanceSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: graphing
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: grafana
|
||||
grafanaCom:
|
||||
id: 1860
|
||||
revision: 32 # as @ 19-09-23
|
||||
revision: 32 # as @ 19-09-23
|
||||
|
||||
{{- end }}
|
||||
|
@ -1,4 +1,5 @@
|
||||
---
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
apiVersion: grafana.integreatly.org/v1beta1
|
||||
kind: GrafanaDatasourceList
|
||||
items:
|
||||
@ -10,7 +11,7 @@ items:
|
||||
namespace: "{{ $.Values.nfc_monitoring.grafana.namespace }}"
|
||||
labels:
|
||||
app.kubernetes.io/component: dashboard
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -24,3 +25,4 @@ items:
|
||||
{{ toYaml . | nindent 8 }}
|
||||
|
||||
{{ end }}
|
||||
{{- end }}
|
||||
|
@ -1,4 +1,4 @@
|
||||
{{ if .Values.nfc_monitoring.alert_manager.ingress.enabled | default "false" -}}
|
||||
{{ if .Values.nfc_monitoring.alert_manager.ingress.enabled -}}
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
@ -8,11 +8,26 @@ metadata:
|
||||
annotations:
|
||||
{{ toYaml $.Values.nfc_monitoring.alert_manager.ingress.annotations | nindent 4 }}
|
||||
spec:
|
||||
{{ toYaml $.Values.nfc_monitoring.alert_manager.ingress.spec | nindent 2 }}
|
||||
tls:
|
||||
- hosts:
|
||||
- {{ .Values.nfc_monitoring.alert_manager.ingress.hostname }}
|
||||
secretName: certificate-tls-alert-manager
|
||||
rules:
|
||||
- host: {{ .Values.nfc_monitoring.alert_manager.ingress.hostname }}
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: alertmanager-{{ $.Release.Name }}
|
||||
port:
|
||||
name: web
|
||||
|
||||
{{ end }}
|
||||
|
||||
{{ if .Values.nfc_monitoring.grafana.ingress.enabled | default "false" -}}
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
{{ if .Values.nfc_monitoring.grafana.ingress.enabled -}}
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
@ -22,9 +37,24 @@ metadata:
|
||||
annotations:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.ingress.annotations | nindent 4 }}
|
||||
spec:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.ingress.spec | nindent 2 }}
|
||||
tls:
|
||||
- hosts:
|
||||
- {{ .Values.nfc_monitoring.grafana.ingress.hostname }}
|
||||
secretName: certificate-tls-grafana
|
||||
rules:
|
||||
- host: {{ .Values.nfc_monitoring.grafana.ingress.hostname }}
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: grafana
|
||||
port:
|
||||
name: grafana-http
|
||||
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
|
||||
{{ if .Values.nfc_monitoring.prometheus.ingress.enabled | default "false" -}}
|
||||
---
|
||||
@ -36,6 +66,20 @@ metadata:
|
||||
annotations:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.ingress.annotations | nindent 4 }}
|
||||
spec:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.ingress.spec | nindent 2 }}
|
||||
tls:
|
||||
- hosts:
|
||||
- {{ .Values.nfc_monitoring.prometheus.ingress.hostname }}
|
||||
secretName: certificate-tls-prometheus
|
||||
rules:
|
||||
- host: {{ .Values.nfc_monitoring.prometheus.ingress.hostname }}
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
port:
|
||||
name: web
|
||||
|
||||
{{ end }}
|
||||
|
@ -1,31 +1,37 @@
|
||||
# apiVersion: networking.k8s.io/v1
|
||||
# kind: NetworkPolicy
|
||||
# metadata:
|
||||
# labels:
|
||||
# app.kubernetes.io/component: exporter
|
||||
# app.kubernetes.io/name: kube-state-metrics
|
||||
# app.kubernetes.io/part-of: kube-prometheus
|
||||
# app.kubernetes.io/version: 2.8.1
|
||||
# name: kube-state-metrics
|
||||
# namespace: monitoring
|
||||
# spec:
|
||||
# egress:
|
||||
# - {}
|
||||
# ingress:
|
||||
# - from:
|
||||
# - podSelector:
|
||||
# matchLabels:
|
||||
# app.kubernetes.io/name: prometheus
|
||||
# ports:
|
||||
# - port: 8443
|
||||
# protocol: TCP
|
||||
# - port: 9443
|
||||
# protocol: TCP
|
||||
# podSelector:
|
||||
# matchLabels:
|
||||
# app.kubernetes.io/component: exporter
|
||||
# app.kubernetes.io/name: kube-state-metrics
|
||||
# app.kubernetes.io/part-of: kube-prometheus
|
||||
# policyTypes:
|
||||
# - Egress
|
||||
# - Ingress
|
||||
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.8.1
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
egress:
|
||||
- {}
|
||||
ingress:
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus
|
||||
ports:
|
||||
- port: 8443
|
||||
protocol: TCP
|
||||
- port: 9443
|
||||
protocol: TCP
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
||||
|
||||
{{ end }}
|
||||
|
@ -1,21 +1,313 @@
|
||||
|
||||
{{ range .Values.nfc_monitoring.network_policy.policies }}
|
||||
{{- if .Values.nfc_monitoring.network_policy.enabled -}}
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml (get $.Values.nfc_monitoring .name ).labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: {{ .name | replace "_" "-" }}
|
||||
namespace: {{ (get $.Values.nfc_monitoring .name ).namespace }}
|
||||
spec:
|
||||
{{ toYaml .policy | nindent 2 }}
|
||||
- name: prometheus
|
||||
policy:
|
||||
egress: # ToDo: add further restrictions to egress. is variable lookup possible to obtain values????
|
||||
# - {}
|
||||
- to: # Alert Manager
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: alerting
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/name: alertmanager
|
||||
ports:
|
||||
- port: 9093
|
||||
protocol: TCP
|
||||
|
||||
- to: # Ceph
|
||||
- ipBlock:
|
||||
cidr: 172.16.10.0/24
|
||||
ports:
|
||||
- port: 9283
|
||||
protocol: TCP
|
||||
|
||||
- to: # Grafana
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: grafana
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: graphing
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: grafana
|
||||
ports:
|
||||
- port: 3000
|
||||
protocol: TCP
|
||||
|
||||
- to: # Grafana Agent
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: monitoring
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: grafana-agent
|
||||
ports:
|
||||
- port: 12345
|
||||
protocol: TCP
|
||||
|
||||
- to: # Kube DNS
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: kube-system
|
||||
podSelector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
ports:
|
||||
- port: 53
|
||||
protocol: TCP
|
||||
- port: 53
|
||||
protocol: UDP
|
||||
|
||||
- to:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus
|
||||
# namespaceSelector:
|
||||
# matchLabels:
|
||||
# kubernetes.io/metadata.name: monitoiring
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: metrics
|
||||
ports: []
|
||||
|
||||
- {} # ToDo: Temp rule: Allow All. this rule MUST be removed when egress has been refactored
|
||||
|
||||
ingress:
|
||||
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus
|
||||
# namespaceSelector:
|
||||
# matchLabels:
|
||||
# kubernetes.io/metadata.name: monitoiring
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: metrics
|
||||
ports: []
|
||||
# - port: 8080
|
||||
# protocol: TCP
|
||||
# - port: 9090
|
||||
# protocol: TCP
|
||||
# - port: 10901
|
||||
# protocol: TCP
|
||||
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: grafana
|
||||
namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: grafana
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: monitoring
|
||||
ports:
|
||||
- port: 9090
|
||||
protocol: TCP
|
||||
|
||||
- from: []
|
||||
ports: []
|
||||
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
||||
|
||||
podSelector:
|
||||
matchLabels:
|
||||
{{ toYaml (get $.Values.nfc_monitoring .name ).labels | nindent 6 }}
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml (get $.Values.nfc_monitoring .name ).labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: {{ .name | replace "_" "-" }}
|
||||
namespace: {{ (get $.Values.nfc_monitoring .name ).namespace }}
|
||||
spec:
|
||||
- name: grafana
|
||||
policy:
|
||||
egress:
|
||||
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: alerting
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/name: alertmanager
|
||||
ports:
|
||||
- port: 9093
|
||||
protocol: TCP
|
||||
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: logging
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: gateway
|
||||
app.kubernetes.io/instance: loki
|
||||
app.kubernetes.io/name: loki
|
||||
ports:
|
||||
- port: 80 # Service Port
|
||||
protocol: TCP
|
||||
- port: 8080 # Pod Port
|
||||
protocol: TCP
|
||||
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: monitoring
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: prometheus
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: metrics
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: query-layer
|
||||
app.kubernetes.io/instance: thanos-query
|
||||
app.kubernetes.io/name: thanos-query
|
||||
ports:
|
||||
- port: 9090
|
||||
protocol: TCP
|
||||
|
||||
- to: [] # Requires internet access for plugins and dashboard downloading
|
||||
ports:
|
||||
- port: 443
|
||||
protocol: TCP
|
||||
|
||||
- to: # Kube DNS
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: kube-system
|
||||
podSelector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
ports:
|
||||
- port: 53
|
||||
protocol: TCP
|
||||
- port: 53
|
||||
protocol: UDP
|
||||
|
||||
ingress:
|
||||
|
||||
- from: []
|
||||
ports:
|
||||
- port: 3000
|
||||
protocol: TCP
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
||||
|
||||
podSelector:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 8 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml (get $.Values.nfc_monitoring .name ).labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: {{ .name | replace "_" "-" }}
|
||||
namespace: {{ (get $.Values.nfc_monitoring .name ).namespace }}
|
||||
spec:
|
||||
- name: grafana_agent
|
||||
policy:
|
||||
egress:
|
||||
|
||||
- to: # Logging
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: logging
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: gateway
|
||||
app.kubernetes.io/instance: loki
|
||||
app.kubernetes.io/name: loki
|
||||
ports:
|
||||
- port: 80
|
||||
protocol: TCP
|
||||
|
||||
- to: # Kube DNS
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: kube-system
|
||||
podSelector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
ports:
|
||||
- port: 53
|
||||
protocol: TCP
|
||||
- port: 53
|
||||
protocol: UDP
|
||||
|
||||
ingress:
|
||||
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: monitoring
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/name: prometheus
|
||||
ports:
|
||||
- port: 12345
|
||||
protocol: TCP
|
||||
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
||||
|
||||
podSelector:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 8 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
||||
{{ end }}
|
||||
|
@ -4,6 +4,7 @@ kind: PodDisruptionBudget
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -14,4 +15,5 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
@ -4,6 +4,7 @@ kind: PodDisruptionBudget
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -14,4 +15,5 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
@ -7,11 +7,12 @@ metadata:
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
spec:
|
||||
minAvailable: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
@ -4,10 +4,11 @@ kind: Prometheus
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: k8s
|
||||
name: {{ $.Release.Name }}
|
||||
namespace: "{{ .Values.nfc_monitoring.prometheus.namespace }}"
|
||||
spec:
|
||||
affinity:
|
||||
@ -15,17 +16,18 @@ spec:
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- apiVersion: v2
|
||||
name: alertmanager-main
|
||||
name: alertmanager-{{ $.Release.Name }}
|
||||
namespace: "{{ .Values.nfc_monitoring.alert_manager.namespace }}"
|
||||
port: web
|
||||
enableFeatures: []
|
||||
externalLabels: {}
|
||||
image: quay.io/prometheus/prometheus:v2.42.0
|
||||
image: {{ .Values.nfc_monitoring.prometheus.image.name }}:{{ .Values.nfc_monitoring.prometheus.image.tag}}
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
podMetadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -33,7 +35,11 @@ spec:
|
||||
podMonitorSelector: {}
|
||||
probeNamespaceSelector: {}
|
||||
probeSelector: {}
|
||||
replicas: 3
|
||||
{{ if .Values.nfc_monitoring.prometheus.remotewrite }}
|
||||
remoteWrite: {{ .Values.nfc_monitoring.prometheus.remotewrite | toYaml | nindent 4 }}
|
||||
|
||||
{{- end }}
|
||||
replicas: {{ $.Values.nfc_monitoring.prometheus.replicas }}
|
||||
resources:
|
||||
requests:
|
||||
memory: 400Mi
|
||||
@ -42,18 +48,20 @@ spec:
|
||||
fsGroup: 2000
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
serviceAccountName: prometheus-k8s
|
||||
serviceAccountName: prometheus-{{ $.Release.Name }}
|
||||
serviceMonitorNamespaceSelector: {}
|
||||
serviceMonitorSelector: {}
|
||||
storage:
|
||||
{{- toYaml .Values.nfc_monitoring.prometheus.storage | nindent 4 }}
|
||||
{{ if .Values.nfc_monitoring.thanos.sidecar.enabled }}
|
||||
{{ if .Values.nfc_monitoring.thanos.sidecar.config }}
|
||||
thanos:
|
||||
image: "{{ .Values.nfc_monitoring.thanos.image.name }}:{{ .Values.nfc_monitoring.thanos.image.tag }}"
|
||||
objectStorageConfig:
|
||||
key: thanos.yaml
|
||||
name: thanos-sidecar-config
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
version: 2.42.0
|
||||
{{ if .Values.nfc_monitoring.prometheus.additional }}
|
||||
{{ toYaml .Values.nfc_monitoring.prometheus.additional | nindent 2 }}
|
||||
|
@ -4,10 +4,11 @@ kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
prometheus: k8s
|
||||
prometheus: {{ $.Release.Name }}
|
||||
role: alert-rules
|
||||
name: alertmanager-main-rules
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
@ -23,7 +24,7 @@ spec:
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"}[5m]) == 0
|
||||
max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[5m]) == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
@ -35,9 +36,9 @@ spec:
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m])
|
||||
max_over_time(alertmanager_cluster_members{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[5m])
|
||||
< on (namespace,service) group_left
|
||||
count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m]))
|
||||
count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[5m]))
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
@ -48,9 +49,9 @@ spec:
|
||||
summary: An Alertmanager instance failed to send notifications.
|
||||
expr: |
|
||||
(
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m])
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[5m])
|
||||
/
|
||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m])
|
||||
rate(alertmanager_notifications_total{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[5m])
|
||||
)
|
||||
> 0.01
|
||||
for: 5m
|
||||
@ -63,9 +64,9 @@ spec:
|
||||
summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
|
||||
expr: |
|
||||
min by (namespace,service, integration) (
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring", integration=~`.*`}[5m])
|
||||
/
|
||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
|
||||
rate(alertmanager_notifications_total{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring", integration=~`.*`}[5m])
|
||||
)
|
||||
> 0.01
|
||||
for: 5m
|
||||
@ -78,9 +79,9 @@ spec:
|
||||
summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
|
||||
expr: |
|
||||
min by (namespace,service, integration) (
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring", integration!~`.*`}[5m])
|
||||
/
|
||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
|
||||
rate(alertmanager_notifications_total{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring", integration!~`.*`}[5m])
|
||||
)
|
||||
> 0.01
|
||||
for: 5m
|
||||
@ -93,7 +94,7 @@ spec:
|
||||
summary: Alertmanager instances within the same cluster have different configurations.
|
||||
expr: |
|
||||
count by (namespace,service) (
|
||||
count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})
|
||||
count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"})
|
||||
)
|
||||
!= 1
|
||||
for: 20m
|
||||
@ -107,11 +108,11 @@ spec:
|
||||
expr: |
|
||||
(
|
||||
count by (namespace,service) (
|
||||
avg_over_time(up{job="alertmanager-main",namespace="monitoring"}[5m]) < 0.5
|
||||
avg_over_time(up{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[5m]) < 0.5
|
||||
)
|
||||
/
|
||||
count by (namespace,service) (
|
||||
up{job="alertmanager-main",namespace="monitoring"}
|
||||
up{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}
|
||||
)
|
||||
)
|
||||
>= 0.5
|
||||
@ -126,11 +127,11 @@ spec:
|
||||
expr: |
|
||||
(
|
||||
count by (namespace,service) (
|
||||
changes(process_start_time_seconds{job="alertmanager-main",namespace="monitoring"}[10m]) > 4
|
||||
changes(process_start_time_seconds{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[10m]) > 4
|
||||
)
|
||||
/
|
||||
count by (namespace,service) (
|
||||
up{job="alertmanager-main",namespace="monitoring"}
|
||||
up{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}
|
||||
)
|
||||
)
|
||||
>= 0.5
|
||||
|
48
templates/PrometheusRule-common.yaml
Normal file
48
templates/PrometheusRule-common.yaml
Normal file
@ -0,0 +1,48 @@
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-prometheus
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
prometheus: {{ $.Release.Name }}
|
||||
role: alert-rules
|
||||
name: common
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
spec:
|
||||
groups:
|
||||
- name: common.rules
|
||||
rules:
|
||||
|
||||
- alert: Watchdog
|
||||
annotations:
|
||||
description: |
|
||||
This is an alert is meant to ensure that the entire alerting pipeline is functional.
|
||||
This alert is always firing, therefore it should always be firing in Alertmanager
|
||||
and always fire against a receiver. There are integrations with various notification
|
||||
mechanisms that send a notification when this alert is not firing. For example the
|
||||
"DeadMansSnitch" integration in PagerDuty.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
|
||||
summary: An alert that should always be firing to certify that Alertmanager is working properly.
|
||||
expr: vector(1)
|
||||
labels:
|
||||
severity: none
|
||||
|
||||
- alert: InfoInhibitor
|
||||
annotations:
|
||||
description: |
|
||||
This is an alert that is used to inhibit info alerts.
|
||||
By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
|
||||
other alerts.
|
||||
This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
|
||||
severity of 'warning' or 'critical' starts firing on the same namespace.
|
||||
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
|
||||
summary: Info-level alert inhibition.
|
||||
expr: ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
|
||||
labels:
|
||||
severity: none
|
@ -4,10 +4,11 @@ kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
prometheus: k8s
|
||||
prometheus: {{ $.Release.Name }}
|
||||
role: alert-rules
|
||||
name: grafana-agent
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
|
@ -1,13 +1,15 @@
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
prometheus: k8s
|
||||
prometheus: {{ $.Release.Name }}
|
||||
role: alert-rules
|
||||
name: grafana-rules
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
@ -32,3 +34,5 @@ spec:
|
||||
- expr: |
|
||||
sum by (namespace, job, handler, status_code) (rate(grafana_http_request_duration_seconds_count[5m]))
|
||||
record: namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m
|
||||
|
||||
{{- end }}
|
||||
|
@ -1,14 +1,16 @@
|
||||
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-prometheus
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
prometheus: k8s
|
||||
prometheus: {{ $.Release.Name }}
|
||||
role: alert-rules
|
||||
name: kube-prometheus-rules
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
@ -25,33 +27,6 @@ spec:
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: Watchdog
|
||||
annotations:
|
||||
description: |
|
||||
This is an alert meant to ensure that the entire alerting pipeline is functional.
|
||||
This alert is always firing, therefore it should always be firing in Alertmanager
|
||||
and always fire against a receiver. There are integrations with various notification
|
||||
mechanisms that send a notification when this alert is not firing. For example the
|
||||
"DeadMansSnitch" integration in PagerDuty.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
|
||||
summary: An alert that should always be firing to certify that Alertmanager is working properly.
|
||||
expr: vector(1)
|
||||
labels:
|
||||
severity: none
|
||||
- alert: InfoInhibitor
|
||||
annotations:
|
||||
description: |
|
||||
This is an alert that is used to inhibit info alerts.
|
||||
By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
|
||||
other alerts.
|
||||
This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
|
||||
severity of 'warning' or 'critical' starts firing on the same namespace.
|
||||
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
|
||||
summary: Info-level alert inhibition.
|
||||
expr: ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
|
||||
labels:
|
||||
severity: none
|
||||
- name: node-network
|
||||
rules:
|
||||
- alert: NodeNetworkInterfaceFlapping
|
||||
@ -84,3 +59,5 @@ spec:
|
||||
record: count:up1
|
||||
- expr: count without(instance, pod, node) (up == 0)
|
||||
record: count:up0
|
||||
|
||||
{{ end }}
|
@ -1,14 +1,16 @@
|
||||
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
prometheus: k8s
|
||||
prometheus: {{ $.Release.Name }}
|
||||
role: alert-rules
|
||||
name: kube-state-metrics-rules
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
@ -65,3 +67,5 @@ spec:
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
{{ end }}
|
||||
|
@ -1,13 +1,15 @@
|
||||
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-prometheus
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
prometheus: k8s
|
||||
prometheus: {{ $.Release.Name }}
|
||||
role: alert-rules
|
||||
name: kubernetes-monitoring-rules
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
@ -1439,3 +1441,5 @@ spec:
|
||||
labels:
|
||||
quantile: "0.5"
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
|
||||
{{ end }}
|
@ -1,15 +1,17 @@
|
||||
---
|
||||
{{- if .Values.nfc_monitoring.loki.enabled | default false -}}
|
||||
{{ if .Values.nfc_monitoring.loki.config }}
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: logging
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: loki
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
prometheus: k8s
|
||||
prometheus: {{ $.Release.Name }}
|
||||
role: alert-rules
|
||||
name: loki
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
@ -109,4 +111,5 @@ spec:
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
{{ end }}
|
||||
{{- end -}}
|
||||
|
@ -4,11 +4,12 @@ kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
prometheus: k8s
|
||||
prometheus: {{ $.Release.Name }}
|
||||
role: alert-rules
|
||||
name: node-exporter-rules
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
@ -16,6 +17,16 @@ spec:
|
||||
groups:
|
||||
- name: node-exporter
|
||||
rules:
|
||||
{{ range $index, $node := (lookup "v1" "Node" "" "").items }}
|
||||
- alert: NodeExporterJobMissing-{{ $node.metadata.name }}
|
||||
annotations:
|
||||
summary: Node Exporter job missing for node {{ $node.metadata.name }}. (instance {{ `{{` }} $labels.instance }})
|
||||
description: "Node Exporter job has disappeared\n Node = {{ $node.metadata.name }}\n Value = {{ `{{` }} $value }}\n LABELS = {{ `{{` }} $labels }}"
|
||||
expr: absent(up{job="node-exporter", node="{{ $node.metadata.name }}"})
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
{{ end }}
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ `{{` }} $labels.device }} at {{ `{{` }} $labels.instance }} has only {{ `{{` }} printf "%.2f" $value }}% available space left and is filling up.
|
||||
|
@ -4,12 +4,13 @@ kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
prometheus: k8s
|
||||
prometheus: {{ $.Release.Name }}
|
||||
role: alert-rules
|
||||
name: prometheus-k8s-prometheus-rules
|
||||
name: prometheus-{{ $.Release.Name }}-prometheus-rules
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
spec:
|
||||
groups:
|
||||
@ -23,7 +24,7 @@ spec:
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"}[5m]) == 0
|
||||
max_over_time(prometheus_config_last_reload_successful{job="prometheus",namespace="monitoring"}[5m]) == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
@ -36,9 +37,9 @@ spec:
|
||||
# Without min_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
(
|
||||
predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30)
|
||||
predict_linear(prometheus_notifications_queue_length{job="prometheus",namespace="monitoring"}[5m], 60 * 30)
|
||||
>
|
||||
min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
min_over_time(prometheus_notifications_queue_capacity{job="prometheus",namespace="monitoring"}[5m])
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
@ -50,9 +51,9 @@ spec:
|
||||
summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
|
||||
expr: |
|
||||
(
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
rate(prometheus_notifications_errors_total{job="prometheus",namespace="monitoring"}[5m])
|
||||
/
|
||||
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
rate(prometheus_notifications_sent_total{job="prometheus",namespace="monitoring"}[5m])
|
||||
)
|
||||
* 100
|
||||
> 1
|
||||
@ -67,7 +68,7 @@ spec:
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"}[5m]) < 1
|
||||
max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus",namespace="monitoring"}[5m]) < 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -77,7 +78,7 @@ spec:
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing
|
||||
summary: Prometheus has issues reloading blocks from disk.
|
||||
expr: |
|
||||
increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
|
||||
increase(prometheus_tsdb_reloads_failures_total{job="prometheus",namespace="monitoring"}[3h]) > 0
|
||||
for: 4h
|
||||
labels:
|
||||
severity: warning
|
||||
@ -87,7 +88,7 @@ spec:
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing
|
||||
summary: Prometheus has issues compacting blocks.
|
||||
expr: |
|
||||
increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
|
||||
increase(prometheus_tsdb_compactions_failed_total{job="prometheus",namespace="monitoring"}[3h]) > 0
|
||||
for: 4h
|
||||
labels:
|
||||
severity: warning
|
||||
@ -98,12 +99,12 @@ spec:
|
||||
summary: Prometheus is not ingesting samples.
|
||||
expr: |
|
||||
(
|
||||
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
|
||||
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus",namespace="monitoring"}[5m]) <= 0
|
||||
and
|
||||
(
|
||||
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="monitoring"}) > 0
|
||||
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus",namespace="monitoring"}) > 0
|
||||
or
|
||||
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="monitoring"}) > 0
|
||||
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus",namespace="monitoring"}) > 0
|
||||
)
|
||||
)
|
||||
for: 10m
|
||||
@ -115,7 +116,7 @@ spec:
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps
|
||||
summary: Prometheus is dropping samples with duplicate timestamps.
|
||||
expr: |
|
||||
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus",namespace="monitoring"}[5m]) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -125,7 +126,7 @@ spec:
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps
|
||||
summary: Prometheus drops samples with out-of-order timestamps.
|
||||
expr: |
|
||||
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus",namespace="monitoring"}[5m]) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -136,12 +137,12 @@ spec:
|
||||
summary: Prometheus fails to send samples to remote storage.
|
||||
expr: |
|
||||
(
|
||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
|
||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus",namespace="monitoring"}[5m]))
|
||||
/
|
||||
(
|
||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
|
||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus",namespace="monitoring"}[5m]))
|
||||
+
|
||||
(rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
|
||||
(rate(prometheus_remote_storage_succeeded_samples_total{job="job="prometheus",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus",namespace="monitoring"}[5m]))
|
||||
)
|
||||
)
|
||||
* 100
|
||||
@ -158,9 +159,9 @@ spec:
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
(
|
||||
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus",namespace="monitoring"}[5m])
|
||||
- ignoring(remote_name, url) group_right
|
||||
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus",namespace="monitoring"}[5m])
|
||||
)
|
||||
> 120
|
||||
for: 15m
|
||||
@ -168,16 +169,16 @@ spec:
|
||||
severity: critical
|
||||
- alert: PrometheusRemoteWriteDesiredShards
|
||||
annotations:
|
||||
description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} remote write desired shards calculation wants to run {{ `{{` }} $value }} shards for queue {{ `{{` }} $labels.remote_name}}:{{ `{{` }} $labels.url }}, which is more than the max of {{ `{{` }} printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}.
|
||||
description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} remote write desired shards calculation wants to run {{ `{{` }} $value }} shards for queue {{ `{{` }} $labels.remote_name}}:{{ `{{` }} $labels.url }}, which is more than the max of {{ `{{` }} printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus",namespace="monitoring"}` $labels.instance | query | first | value }}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards
|
||||
summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
(
|
||||
max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
max_over_time(prometheus_remote_storage_shards_desired{job="prometheus",namespace="monitoring"}[5m])
|
||||
>
|
||||
max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
max_over_time(prometheus_remote_storage_shards_max{job="prometheus",namespace="monitoring"}[5m])
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
@ -188,7 +189,7 @@ spec:
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures
|
||||
summary: Prometheus is failing rule evaluations.
|
||||
expr: |
|
||||
increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
increase(prometheus_rule_evaluation_failures_total{job="prometheus",namespace="monitoring"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
@ -198,7 +199,7 @@ spec:
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations
|
||||
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
|
||||
expr: |
|
||||
increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
increase(prometheus_rule_group_iterations_missed_total{job="prometheus",namespace="monitoring"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -208,7 +209,7 @@ spec:
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit
|
||||
summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
|
||||
expr: |
|
||||
increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus",namespace="monitoring"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -218,7 +219,7 @@ spec:
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit
|
||||
summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit.
|
||||
expr: |
|
||||
increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus",namespace="monitoring"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -228,7 +229,7 @@ spec:
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit
|
||||
summary: Prometheus has dropped some targets that exceeded body size limit.
|
||||
expr: |
|
||||
increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus",namespace="monitoring"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -238,7 +239,7 @@ spec:
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit
|
||||
summary: Prometheus has failed scrapes that have exceeded the configured sample limit.
|
||||
expr: |
|
||||
increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus",namespace="monitoring"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -248,7 +249,7 @@ spec:
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure
|
||||
summary: Prometheus has failed to sync targets.
|
||||
expr: |
|
||||
increase(prometheus_target_sync_failed_total{job="prometheus-k8s",namespace="monitoring"}[30m]) > 0
|
||||
increase(prometheus_target_sync_failed_total{job="prometheus",namespace="monitoring"}[30m]) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
@ -258,7 +259,7 @@ spec:
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheushighqueryload
|
||||
summary: Prometheus is reaching its maximum capacity serving concurrent requests.
|
||||
expr: |
|
||||
avg_over_time(prometheus_engine_queries{job="prometheus-k8s",namespace="monitoring"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.8
|
||||
avg_over_time(prometheus_engine_queries{job="prometheus",namespace="monitoring"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus",namespace="monitoring"}[5m]) > 0.8
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -269,9 +270,9 @@ spec:
|
||||
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
||||
expr: |
|
||||
min without (alertmanager) (
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
|
||||
rate(prometheus_notifications_errors_total{job="prometheus",namespace="monitoring",alertmanager!~``}[5m])
|
||||
/
|
||||
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
|
||||
rate(prometheus_notifications_sent_total{job="prometheus",namespace="monitoring",alertmanager!~``}[5m])
|
||||
)
|
||||
* 100
|
||||
> 3
|
||||
|
@ -1,15 +1,17 @@
|
||||
{{ if .Values.nfc_monitoring.thanos.sidecar.enabled }}
|
||||
{{ if .Values.nfc_monitoring.thanos.sidecar.config }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: thanos
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
prometheus: k8s
|
||||
prometheus: {{ $.Release.Name }}
|
||||
role: alert-rules
|
||||
name: thanos-sidecar-rules
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
@ -52,4 +54,5 @@ spec:
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
|
@ -2,17 +2,18 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
items:
|
||||
|
||||
{{ range .Values.nfc_monitoring.prometheus.monitor_namespaces }}
|
||||
{{ range $index, $namespace := (lookup "v1" "Namespace" "" "").items }}
|
||||
- apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: prometheus-k8s
|
||||
namespace: {{ . | quote }}
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
namespace: {{ $namespace.metadata.name | quote }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
|
@ -4,6 +4,7 @@ kind: RoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -12,8 +13,8 @@ metadata:
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: prometheus-k8s-config
|
||||
name: prometheus-{{ $.Release.Name }}-config
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
|
@ -1,24 +1,25 @@
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
items:
|
||||
{{ range .Values.nfc_monitoring.prometheus.monitor_namespaces }}
|
||||
{{ range $index, $namespace := (lookup "v1" "Namespace" "" "").items }}
|
||||
- apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: prometheus-k8s
|
||||
namespace: {{ . | quote }}
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
namespace: {{ $namespace.metadata.name | quote }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
namespace: {{ $.Values.nfc_monitoring.prometheus.namespace }}
|
||||
{{ end }}
|
||||
|
||||
|
@ -4,6 +4,7 @@ kind: RoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/version: 0.11.1
|
||||
name: resource-metrics-auth-reader
|
||||
namespace: kube-system
|
||||
|
@ -4,10 +4,11 @@ kind: Role
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: prometheus-k8s-config
|
||||
name: prometheus-{{ $.Release.Name }}-config
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
|
@ -4,6 +4,7 @@ kind: Secret
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -1,10 +1,12 @@
|
||||
{{ if .Values.nfc_monitoring.thanos.sidecar.enabled }}
|
||||
{{ if .Values.nfc_monitoring.thanos.sidecar.config }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -16,3 +18,4 @@ stringData:
|
||||
{{ toYaml .Values.nfc_monitoring.thanos.sidecar.config | nindent 4 }}
|
||||
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
|
@ -5,7 +5,7 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: calico-metrics
|
||||
namespace: kube-system
|
||||
namespace: calico-system
|
||||
labels:
|
||||
k8s-app: calico-node
|
||||
spec:
|
||||
|
@ -1,5 +1,5 @@
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
---
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
@ -7,12 +7,14 @@ metadata:
|
||||
namespace: {{ .Values.nfc_monitoring.grafana.namespace }}
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
spec:
|
||||
selector:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
#type: NodePort
|
||||
#type: LoadBalancer
|
||||
@ -24,3 +26,5 @@ spec:
|
||||
#nodePort: 3000
|
||||
#type: LoadBalancer
|
||||
sessionAffinity: ClientIP
|
||||
|
||||
{{- end }}
|
||||
|
@ -6,12 +6,14 @@ metadata:
|
||||
namespace: monitoring
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
spec:
|
||||
selector:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
ports:
|
||||
- name: grafana-metrics
|
||||
|
@ -4,10 +4,11 @@ kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: alertmanager-main
|
||||
name: alertmanager-{{ $.Release.Name }}
|
||||
namespace: {{ .Values.nfc_monitoring.alert_manager.namespace | quote }}
|
||||
spec:
|
||||
ports:
|
||||
@ -19,7 +20,7 @@ spec:
|
||||
targetPort: reloader-web
|
||||
selector:
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
sessionAffinity: ClientIP
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
@ -6,7 +7,7 @@ metadata:
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app.kubernetes.io/component: proxy
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-monitor-proxy
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -16,7 +17,7 @@ metadata:
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/component: proxy
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-monitor-proxy
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
||||
@ -28,3 +29,5 @@ spec:
|
||||
port: 10259
|
||||
targetPort: kube-scheduler
|
||||
sessionAffinity: ClientIP
|
||||
|
||||
{{ end }}
|
||||
|
@ -1,10 +1,11 @@
|
||||
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -22,7 +23,8 @@ spec:
|
||||
targetPort: https-self
|
||||
selector:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
||||
{{ end }}
|
||||
|
@ -4,6 +4,7 @@ kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -16,4 +17,5 @@ spec:
|
||||
targetPort: 6443
|
||||
selector:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
@ -4,10 +4,11 @@ kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
spec:
|
||||
ports:
|
||||
@ -19,6 +20,7 @@ spec:
|
||||
targetPort: reloader-web
|
||||
selector:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
sessionAffinity: ClientIP
|
||||
|
||||
@ -30,7 +32,7 @@ kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus-sidecar
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: thanos-sidecar
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
@ -48,5 +50,6 @@ spec:
|
||||
targetPort: 10902
|
||||
selector:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
|
||||
{{ end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
---
|
||||
apiVersion: v1
|
||||
automountServiceAccountToken: false
|
||||
@ -7,6 +8,9 @@ metadata:
|
||||
namespace: {{ .Values.nfc_monitoring.grafana.namespace }}
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
||||
{{- end }}
|
||||
|
@ -7,6 +7,7 @@ metadata:
|
||||
namespace: monitoring
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -5,6 +5,7 @@ kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
@ -6,8 +7,10 @@ metadata:
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app.kubernetes.io/component: proxy
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-monitor-proxy
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
||||
{{ end }}
|
@ -1,3 +1,4 @@
|
||||
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
|
||||
---
|
||||
apiVersion: v1
|
||||
automountServiceAccountToken: false
|
||||
@ -5,10 +6,12 @@ kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
|
||||
{{ end }}
|
||||
|
@ -5,6 +5,7 @@ kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
|
@ -5,8 +5,9 @@ kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
|
@ -1,8 +1,10 @@
|
||||
{{ if .Values.nfc_monitoring.prometheus.service_monitor.apiserver }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: apiserver
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -76,3 +78,5 @@ spec:
|
||||
matchLabels:
|
||||
component: apiserver
|
||||
provider: kubernetes
|
||||
|
||||
{{ end }}
|
||||
|
@ -1,8 +1,10 @@
|
||||
{{ if .Values.nfc_monitoring.prometheus.service_monitor.cadvisor }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: cadvisor
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -50,3 +52,5 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: kubelet
|
||||
|
||||
{{ end }}
|
||||
|
@ -1,10 +1,12 @@
|
||||
---
|
||||
{{- if eq .Values.nfc_monitoring.kubernetes.networking "calico" -}}
|
||||
{{ if .Values.nfc_monitoring.prometheus.service_monitor.calico }}
|
||||
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: calico
|
||||
app.kubernetes.io/component: networking
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -12,7 +14,7 @@ metadata:
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
k8s-app: calico-node
|
||||
name: calico
|
||||
namespace: kube-system
|
||||
namespace: calico-system
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
@ -34,3 +36,4 @@ spec:
|
||||
k8s-app: calico-node
|
||||
|
||||
{{- end -}}
|
||||
{{ end }}
|
||||
|
@ -1,8 +1,10 @@
|
||||
{{ if .Values.nfc_monitoring.prometheus.service_monitor.coredns }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: coredns
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -26,3 +28,5 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
|
||||
{{ end }}
|
@ -1,9 +1,11 @@
|
||||
{{ if .Values.nfc_monitoring.grafana.enabled -}}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -31,3 +33,7 @@ spec:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 6 }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
|
||||
{{- end }}
|
||||
|
||||
|
@ -1,8 +1,10 @@
|
||||
{{ if .Values.nfc_monitoring.prometheus.service_monitor.kubelet }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kubelet
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -85,3 +87,6 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: kubelet
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
|
||||
{{ end }}
|
@ -1,9 +1,11 @@
|
||||
{{ if .Values.nfc_monitoring.grafana_agent.enabled }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: node
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -74,4 +76,7 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
||||
{{ end }}
|
||||
|
@ -7,6 +7,7 @@ metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: ceph
|
||||
app.kubernetes.io/component: storage
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
|
@ -1,10 +1,12 @@
|
||||
{{ if .Values.nfc_monitoring.prometheus.service_monitor.kube_controller_manager }}
|
||||
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: proxy
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-controller-manager
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -77,6 +79,9 @@ spec:
|
||||
insecureSkipVerify: true
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-monitor-proxy
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
|
@ -1,10 +1,12 @@
|
||||
{{ if .Values.nfc_monitoring.prometheus.service_monitor.kube_scheduler }}
|
||||
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: proxy
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-scheduler
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -31,6 +33,9 @@ spec:
|
||||
insecureSkipVerify: true
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-monitor-proxy
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
|
@ -1,10 +1,11 @@
|
||||
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
@ -41,6 +42,8 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
||||
{{ end }}
|
||||
|
@ -1,9 +1,11 @@
|
||||
{{ if .Values.nfc_monitoring.grafana_agent.enabled }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -39,4 +41,7 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
||||
{{ end }}
|
||||
|
@ -1,9 +1,11 @@
|
||||
{{ if .Values.nfc_monitoring.prometheus_adaptor.enabled }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -32,4 +34,7 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
||||
{{ end }}
|
||||
|
@ -1,13 +1,15 @@
|
||||
{{ if .Values.nfc_monitoring.prometheus.enabled }}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
name: prometheus-k8s
|
||||
name: prometheus-{{ $.Release.Name }}
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
spec:
|
||||
endpoints:
|
||||
@ -20,21 +22,26 @@ spec:
|
||||
sourceLabels:
|
||||
- __meta_kubernetes_pod_name
|
||||
targetLabel: instance
|
||||
- targetLabel: "job"
|
||||
replacement: "prometheus"
|
||||
- interval: 30s
|
||||
port: reloader-web
|
||||
selector:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
||||
{{ if .Values.nfc_monitoring.thanos.sidecar.enabled }}
|
||||
{{ if .Values.nfc_monitoring.thanos.sidecar.config }}
|
||||
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus-sidecar
|
||||
app.kubernetes.io/instance: thanos-sidecar
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: thanos-sidecar
|
||||
app.kubernetes.io/version: v0.30.2
|
||||
name: thanos-sidecar
|
||||
@ -52,8 +59,10 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: prometheus-sidecar
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: thanos-sidecar
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
|
@ -6,11 +6,12 @@ kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: storage
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/name: ceph
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
prometheus: k8s
|
||||
prometheus: {{ $.Release.Name }}
|
||||
role: alert-rules
|
||||
name: ceph-rules
|
||||
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
|
||||
|
@ -1,9 +1,11 @@
|
||||
{{ if .Values.nfc_monitoring.alert_manager.enabled -}}
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
app.kubernetes.io/managed-by: {{ $.Release.Service }}
|
||||
app.kubernetes.io/version: {{ $.Chart.Version }}
|
||||
@ -14,12 +16,30 @@ spec:
|
||||
endpoints:
|
||||
- interval: 30s
|
||||
port: web
|
||||
relabelings:
|
||||
- action: replace
|
||||
regex: (.*)
|
||||
replacement: $1
|
||||
sourceLabels:
|
||||
- __meta_kubernetes_pod_name
|
||||
targetLabel: instance
|
||||
- interval: 30s
|
||||
port: reloader-web
|
||||
relabelings:
|
||||
- action: replace
|
||||
regex: (.*)
|
||||
replacement: $1
|
||||
sourceLabels:
|
||||
- __meta_kubernetes_pod_name
|
||||
targetLabel: instance
|
||||
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Values.nfc_monitoring.alert_manager.namespace }}"
|
||||
selector:
|
||||
matchLabels:
|
||||
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 6 }}
|
||||
app.kubernetes.io/instance: {{ $.Release.Name }}
|
||||
app.kubernetes.io/part-of: {{ $.Chart.Name }}
|
||||
|
||||
{{ end }}
|
||||
|
453
values.yaml
453
values.yaml
@ -12,35 +12,27 @@ nfc_monitoring:
|
||||
|
||||
|
||||
alert_manager:
|
||||
|
||||
enabled: true
|
||||
image:
|
||||
name: quay.io/prometheus/alertmanager
|
||||
tag: 'v0.26.0'
|
||||
|
||||
# How many replicas to deploy
|
||||
replicas: 1
|
||||
|
||||
|
||||
ingress:
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: "selfsigned-issuer"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
# enabled: false # Optional, boolean.
|
||||
spec:
|
||||
tls:
|
||||
- hosts:
|
||||
- alert-manager.local
|
||||
secretName: certificate-tls-alert-manager
|
||||
rules:
|
||||
- host: alert-manager.local
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: alertmanager-main
|
||||
port:
|
||||
name: web
|
||||
|
||||
enabled: false
|
||||
|
||||
hostname: alert-manager.local
|
||||
|
||||
|
||||
labels:
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/name: alertmanager
|
||||
|
||||
@ -49,6 +41,11 @@ nfc_monitoring:
|
||||
|
||||
grafana:
|
||||
|
||||
dashboards:
|
||||
cert_manager: false
|
||||
|
||||
enabled: false
|
||||
|
||||
# Grafana Configuration
|
||||
# Type: Dict
|
||||
# See: https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana
|
||||
@ -72,33 +69,20 @@ nfc_monitoring:
|
||||
|
||||
image:
|
||||
name: grafana/grafana
|
||||
tag: '10.1.2' # '10.0.5'
|
||||
tag: '10.3.1' # '10.0.5'
|
||||
|
||||
ingress:
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: "selfsigned-issuer"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
# enabled: false # Optional, boolean.
|
||||
spec:
|
||||
tls:
|
||||
- hosts:
|
||||
- grafana.local
|
||||
secretName: certificate-tls-grafana
|
||||
rules:
|
||||
- host: grafana.local
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: grafana
|
||||
port:
|
||||
name: grafana-http
|
||||
|
||||
enabled: true
|
||||
|
||||
hostname: grafana.local
|
||||
|
||||
|
||||
labels:
|
||||
app.kubernetes.io/component: graphing
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: grafana
|
||||
|
||||
namespace: grafana
|
||||
@ -195,12 +179,14 @@ nfc_monitoring:
|
||||
|
||||
|
||||
grafana_agent:
|
||||
|
||||
enabled: true
|
||||
|
||||
image:
|
||||
name: grafana/agent
|
||||
tag: 'v0.36.1'
|
||||
tag: 'v0.39.2'
|
||||
|
||||
labels:
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: grafana-agent
|
||||
|
||||
@ -217,9 +203,11 @@ nfc_monitoring:
|
||||
|
||||
namespace: logging
|
||||
|
||||
# If no config is setup, logging will not be enabled.
|
||||
config: {}
|
||||
# service name and port are used for the connection to your loki instance
|
||||
service_name: loki-gateway
|
||||
service_port: 80
|
||||
# service_name: loki-gateway
|
||||
# service_port: 80
|
||||
|
||||
ServiceMonitor:
|
||||
selector:
|
||||
@ -229,10 +217,12 @@ nfc_monitoring:
|
||||
|
||||
|
||||
kube_monitor_proxy:
|
||||
enabled: false
|
||||
namespace: monitoring
|
||||
|
||||
|
||||
kube_rbac_proxy:
|
||||
|
||||
# This image is used as part of kube-monitor-proxy.
|
||||
image:
|
||||
name: quay.io/brancz/kube-rbac-proxy
|
||||
@ -240,6 +230,8 @@ nfc_monitoring:
|
||||
|
||||
|
||||
kube_state_metrics:
|
||||
|
||||
enabled: false
|
||||
image:
|
||||
name: registry.k8s.io/kube-state-metrics/kube-state-metrics
|
||||
tag: 'v2.8.1'
|
||||
@ -250,34 +242,42 @@ nfc_monitoring:
|
||||
|
||||
image:
|
||||
name: prom/prometheus
|
||||
tag: 'v2.47.0'
|
||||
tag: 'v2.49.0'
|
||||
|
||||
# How many replicas to deploy
|
||||
replicas: 1
|
||||
|
||||
# alertmanagers:
|
||||
# - name:
|
||||
|
||||
# Configure prometheus to write metrics to remote host
|
||||
# below example config uses a secret named "prometheus-remote-write" with two keys username and password.
|
||||
# Documentation: https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.RemoteWriteSpec
|
||||
remotewrite: {}
|
||||
# url:
|
||||
# name:
|
||||
# remoteTimeout: 30
|
||||
# writeRelabelConfigs:
|
||||
# basicAuth:
|
||||
# username:
|
||||
# name: prometheus-remote-write
|
||||
# key: username
|
||||
# password:
|
||||
# name: prometheus-remote-write
|
||||
# key: password
|
||||
|
||||
|
||||
ingress:
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: "selfsigned-issuer"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
# enabled: false # Optional, boolean.
|
||||
spec:
|
||||
tls:
|
||||
- hosts:
|
||||
- prometheus.local
|
||||
secretName: certificate-tls-prometheus
|
||||
rules:
|
||||
- host: prometheus.local
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: prometheus-k8s
|
||||
port:
|
||||
name: web
|
||||
enabled: true
|
||||
hostname: prometheus.local
|
||||
|
||||
|
||||
# These labels are appended to all Prometheus items and are also the selector labels
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
|
||||
namespace: monitoring
|
||||
@ -307,28 +307,11 @@ nfc_monitoring:
|
||||
topologyKey: kubernetes.io/hostname
|
||||
weight: 10
|
||||
|
||||
# List of namespaces that prometheus is to monitor
|
||||
# used to create Roles and RoleBindings
|
||||
# type: list
|
||||
monitor_namespaces:
|
||||
- alerting
|
||||
- default
|
||||
# - ceph
|
||||
- grafana
|
||||
- monitoring
|
||||
# - kube-dashboard
|
||||
# - kube-metrics
|
||||
- kube-policy
|
||||
- kube-system
|
||||
- logging
|
||||
# - mariadb
|
||||
# - olm
|
||||
# - operators
|
||||
|
||||
# Deploy a generate policy for kyverno to create Role and RoleBindings
|
||||
# for the prometheus service account so it can monitor
|
||||
# new/existing namespaces
|
||||
kyverno_role_policy: true
|
||||
kyverno_role_policy: false
|
||||
|
||||
storage:
|
||||
volumeClaimTemplate:
|
||||
@ -344,26 +327,36 @@ nfc_monitoring:
|
||||
# Type: dict
|
||||
additional:
|
||||
|
||||
# Don't declare remoteWrite Here, as it's don at path .prometheus.remote_write
|
||||
# remoteWrite:
|
||||
# - name: mimir
|
||||
# url: http://mimir-gateway.metrics.svc.cluster.local/api/v1/push
|
||||
|
||||
retention: 24h
|
||||
retentionSize: 20GB
|
||||
retentionSize: 2GB
|
||||
ruleSelector:
|
||||
matchLabels:
|
||||
role: alert-rules
|
||||
|
||||
service_monitor:
|
||||
apiserver: false
|
||||
cadvisor: false
|
||||
calico: false
|
||||
ceph: false
|
||||
coredns: false
|
||||
kube_controller_manager: false
|
||||
kubelet: false
|
||||
kube_scheduler: false
|
||||
|
||||
|
||||
prometheus_adaptor:
|
||||
|
||||
enalbed: false
|
||||
|
||||
image:
|
||||
name: registry.k8s.io/prometheus-adapter/prometheus-adapter
|
||||
tag: 'v0.11.1'
|
||||
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
|
||||
namespace: monitoring
|
||||
@ -404,22 +397,23 @@ nfc_monitoring:
|
||||
sidecar:
|
||||
|
||||
enabled: true
|
||||
|
||||
config:
|
||||
type: S3
|
||||
config:
|
||||
bucket: "thanos-metrics"
|
||||
endpoint: "rook-ceph-rgw-earth.ceph.svc:80"
|
||||
access_key: "7J5NM2MNCDB4T4Y9OKJ5"
|
||||
secret_key: "t9r69RzZdWEBL3NCKiUIpDk6j5625xc6HucusiGG"
|
||||
insecure: true
|
||||
|
||||
# Config must be specified for the sidecar to deploy
|
||||
config: {}
|
||||
# type: S3
|
||||
# config:
|
||||
# bucket: "thanos-metrics"
|
||||
# endpoint: "rook-ceph-rgw-earth.ceph.svc:80"
|
||||
# access_key: "7J5NM2MNCDB4T4Y9OKJ5"
|
||||
# secret_key: "t9r69RzZdWEBL3NCKiUIpDk6j5625xc6HucusiGG"
|
||||
# insecure: true
|
||||
|
||||
|
||||
additions:
|
||||
|
||||
ceph:
|
||||
|
||||
enabled: true
|
||||
enabled: false
|
||||
|
||||
namespace: ceph
|
||||
|
||||
@ -434,7 +428,7 @@ nfc_monitoring:
|
||||
# Add sidecar to grafana pod to load dashboards from configMap
|
||||
dashboard_sidecar:
|
||||
|
||||
enabled: true
|
||||
enabled: false
|
||||
|
||||
image:
|
||||
name: ghcr.io/kiwigrid/k8s-sidecar
|
||||
@ -446,278 +440,7 @@ nfc_monitoring:
|
||||
|
||||
network_policy:
|
||||
|
||||
enabled: true
|
||||
|
||||
# Network Policies to apply. These policies are automagically build using the values below.
|
||||
# What you would find under path root.spec belongs here.
|
||||
#
|
||||
# Do:
|
||||
# - Define 'Ingress'
|
||||
# - Define 'Egress'
|
||||
# - Ensure that the name matches the item name from values.yaml. i.e. nfc_monitoring.{item_name}
|
||||
# for prometheus the item name is 'prometheus'. This value is used to select items pertaining to
|
||||
# that item from values.yaml. for example the labels and namespace.
|
||||
# Dont:
|
||||
# - Define 'podSelector' as this is alreaady included using the selector labels
|
||||
policies:
|
||||
|
||||
### SoF Network Policy: Prometheus ###
|
||||
|
||||
- name: prometheus
|
||||
policy:
|
||||
egress: # ToDo: add further restrictions to egress. is variable lookup possible to obtain values????
|
||||
# - {}
|
||||
- to: # Alert Manager
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: alerting
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/name: alertmanager
|
||||
ports:
|
||||
- port: 9093
|
||||
protocol: TCP
|
||||
|
||||
- to: # Ceph
|
||||
- ipBlock:
|
||||
cidr: 172.16.10.0/24
|
||||
ports:
|
||||
- port: 9283
|
||||
protocol: TCP
|
||||
|
||||
- to: # Grafana
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: grafana
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: graphing
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: grafana
|
||||
ports:
|
||||
- port: 3000
|
||||
protocol: TCP
|
||||
|
||||
- to: # Grafana Agent
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: monitoring
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: grafana-agent
|
||||
ports:
|
||||
- port: 12345
|
||||
protocol: TCP
|
||||
|
||||
- to: # Kube DNS
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: kube-system
|
||||
podSelector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
ports:
|
||||
- port: 53
|
||||
protocol: TCP
|
||||
- port: 53
|
||||
protocol: UDP
|
||||
|
||||
- to:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus
|
||||
# namespaceSelector:
|
||||
# matchLabels:
|
||||
# kubernetes.io/metadata.name: monitoiring
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: metrics
|
||||
ports: []
|
||||
|
||||
- {} # ToDo: Temp rule: Allow All. this rule MUST be removed when egress has been refactored
|
||||
|
||||
ingress:
|
||||
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus
|
||||
# namespaceSelector:
|
||||
# matchLabels:
|
||||
# kubernetes.io/metadata.name: monitoiring
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: metrics
|
||||
ports: []
|
||||
# - port: 8080
|
||||
# protocol: TCP
|
||||
# - port: 9090
|
||||
# protocol: TCP
|
||||
# - port: 10901
|
||||
# protocol: TCP
|
||||
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: grafana
|
||||
namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: grafana
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: monitoring
|
||||
ports:
|
||||
- port: 9090
|
||||
protocol: TCP
|
||||
|
||||
- from: []
|
||||
ports: []
|
||||
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
||||
|
||||
### SoF Network Policy: Grafana ###
|
||||
|
||||
- name: grafana
|
||||
policy:
|
||||
egress:
|
||||
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: alerting
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/name: alertmanager
|
||||
ports:
|
||||
- port: 9093
|
||||
protocol: TCP
|
||||
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: logging
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: gateway
|
||||
app.kubernetes.io/instance: loki
|
||||
app.kubernetes.io/name: loki
|
||||
ports:
|
||||
- port: 80 # Service Port
|
||||
protocol: TCP
|
||||
- port: 8080 # Pod Port
|
||||
protocol: TCP
|
||||
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: monitoring
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: metrics
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: query-layer
|
||||
app.kubernetes.io/instance: thanos-query
|
||||
app.kubernetes.io/name: thanos-query
|
||||
ports:
|
||||
- port: 9090
|
||||
protocol: TCP
|
||||
|
||||
- to: [] # Requires internet access for plugins and dashboard downloading
|
||||
ports:
|
||||
- port: 443
|
||||
protocol: TCP
|
||||
|
||||
- to: # Kube DNS
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: kube-system
|
||||
podSelector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
ports:
|
||||
- port: 53
|
||||
protocol: TCP
|
||||
- port: 53
|
||||
protocol: UDP
|
||||
|
||||
ingress:
|
||||
|
||||
- from: []
|
||||
ports:
|
||||
- port: 3000
|
||||
protocol: TCP
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
||||
|
||||
### SoF Network Policy: Grafana Agent ###
|
||||
|
||||
- name: grafana_agent
|
||||
policy:
|
||||
egress:
|
||||
|
||||
- to: # Logging
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: logging
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: gateway
|
||||
app.kubernetes.io/instance: loki
|
||||
app.kubernetes.io/name: loki
|
||||
ports:
|
||||
- port: 80
|
||||
protocol: TCP
|
||||
|
||||
- to: # Kube DNS
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: kube-system
|
||||
podSelector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
ports:
|
||||
- port: 53
|
||||
protocol: TCP
|
||||
- port: 53
|
||||
protocol: UDP
|
||||
|
||||
ingress:
|
||||
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: monitoring
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
ports:
|
||||
- port: 12345
|
||||
protocol: TCP
|
||||
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
||||
|
||||
enabled: false
|
||||
|
||||
|
||||
loki_instance:
|
||||
|
Submodule website-template updated: 992b54805b...8735f623dc
Reference in New Issue
Block a user