772 lines
21 KiB
YAML
772 lines
21 KiB
YAML
---
|
|
|
|
# All values within this helm chart values.yaml file are under namespace `nfc_monitoring`.
|
|
# this provides the opportunity to include this helm chart as a dependency without
|
|
# variable collision
|
|
|
|
nfc_monitoring:
|
|
|
|
kubernetes:
|
|
cluster_dns_name: cluster.local
|
|
networking: calico
|
|
|
|
|
|
alert_manager:
|
|
image:
|
|
name: quay.io/prometheus/alertmanager
|
|
tag: 'v0.26.0'
|
|
|
|
|
|
ingress:
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: "selfsigned-issuer"
|
|
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
|
# enabled: false # Optional, boolean.
|
|
spec:
|
|
tls:
|
|
- hosts:
|
|
- alert-manager.local
|
|
secretName: certificate-tls-alert-manager
|
|
rules:
|
|
- host: alert-manager.local
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend:
|
|
service:
|
|
name: alertmanager-main
|
|
port:
|
|
name: web
|
|
|
|
labels:
|
|
app.kubernetes.io/instance: main
|
|
app.kubernetes.io/component: alert-router
|
|
app.kubernetes.io/name: alertmanager
|
|
|
|
namespace: alerting
|
|
|
|
|
|
grafana:
|
|
|
|
# Grafana Configuration
|
|
# Type: Dict
|
|
# See: https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana
|
|
config:
|
|
analytics:
|
|
enabled: 'false'
|
|
# database:
|
|
# type: mysql
|
|
# host: mariadb-galera.mariadb.svc:3306
|
|
# name: grafana
|
|
# user: root
|
|
# password: admin
|
|
|
|
log:
|
|
mode: "console"
|
|
auth:
|
|
disable_login_form: "false"
|
|
security:
|
|
admin_user: admin
|
|
admin_password: admin
|
|
|
|
image:
|
|
name: grafana/grafana
|
|
tag: '10.1.2' # '10.0.5'
|
|
|
|
ingress:
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: "selfsigned-issuer"
|
|
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
|
# enabled: false # Optional, boolean.
|
|
spec:
|
|
tls:
|
|
- hosts:
|
|
- grafana.local
|
|
secretName: certificate-tls-grafana
|
|
rules:
|
|
- host: grafana.local
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend:
|
|
service:
|
|
name: grafana
|
|
port:
|
|
name: grafana-http
|
|
|
|
labels:
|
|
app.kubernetes.io/component: graphing
|
|
app.kubernetes.io/instance: k8s
|
|
app.kubernetes.io/name: grafana
|
|
|
|
namespace: grafana
|
|
|
|
replicas: 1
|
|
|
|
# storage_accessModes: ReadWriteMany
|
|
|
|
affinity:
|
|
nodeAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- preference:
|
|
matchExpressions:
|
|
- key: node-role.kubernetes.io/worker
|
|
operator: Exists
|
|
weight: 100
|
|
- preference:
|
|
matchExpressions:
|
|
- key: node-role.kubernetes.io/storage
|
|
operator: DoesNotExist
|
|
weight: 100
|
|
podAntiAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- podAffinityTerm:
|
|
labelSelector:
|
|
matchExpressions:
|
|
- key: app.kubernetes.io/name
|
|
operator: In
|
|
values:
|
|
- prometheus
|
|
topologyKey: kubernetes.io/hostname
|
|
weight: 10
|
|
|
|
# To add Grafan datasources
|
|
# Type: list
|
|
# See: https://grafana.com/docs/grafana/latest/administration/provisioning/#data-sources
|
|
DataSources:
|
|
- name: alertmanager
|
|
type: alertmanager
|
|
access: proxy
|
|
url: "http://alertmanager-main.{{ .Values.nfc_monitoring.alert_manager.namespace }}.svc:9093"
|
|
isDefault: false
|
|
jsonData:
|
|
tlsSkipVerify: true
|
|
timeInterval: "5s"
|
|
implementation: prometheus
|
|
handleGrafanaManagedAlerts: false
|
|
orgId: 1
|
|
editable: true
|
|
|
|
- name: loki
|
|
type: loki
|
|
access: proxy
|
|
url: "http://{{ .Values.nfc_monitoring.loki.service_name }}.{{ .Values.nfc_monitoring.loki.namespace }}.svc.{{ .Values.nfc_monitoring.kubernetes.cluster_dns_name }}:{{ .Values.nfc_monitoring.loki.service_port }}"
|
|
isDefault: false
|
|
jsonData:
|
|
orgId: 1
|
|
editable: true
|
|
|
|
# - name: mimir
|
|
# type: prometheus
|
|
# access: proxy
|
|
# url: "http://mimir-gateway.metrics.svc.cluster.local/prometheus"
|
|
# isDefault: false
|
|
# jsonData:
|
|
# manageAlerts: true
|
|
# orgId: 1
|
|
# prometheusType: Mimir
|
|
# editable: true
|
|
|
|
# - name: prometheus
|
|
# type: prometheus
|
|
# access: proxy
|
|
# url: "http://prometheus-k8s.{{ .Values.nfc_monitoring.prometheus.namespace }}.svc:9090"
|
|
# isDefault: true
|
|
# jsonData:
|
|
# manageAlerts: true
|
|
# orgId: 1
|
|
# prometheusType: Prometheus
|
|
# prometheusVersion: 2.42.0
|
|
# editable: true
|
|
|
|
- name: thanos
|
|
type: prometheus
|
|
access: proxy
|
|
url: "http://thanos-query.metrics.svc:9090"
|
|
isDefault: true
|
|
jsonData:
|
|
manageAlerts: true
|
|
orgId: 1
|
|
prometheusType: Thanos
|
|
prometheusVersion: 0.31.0
|
|
editable: true
|
|
|
|
|
|
grafana_agent:
|
|
image:
|
|
name: grafana/agent
|
|
tag: 'v0.36.1'
|
|
|
|
labels:
|
|
app.kubernetes.io/instance: k8s
|
|
app.kubernetes.io/component: exporter
|
|
app.kubernetes.io/name: grafana-agent
|
|
|
|
namespace: monitoring
|
|
|
|
|
|
loki:
|
|
|
|
enabled: true
|
|
|
|
image:
|
|
name: grafana/loki
|
|
tag: 2.7.4
|
|
|
|
namespace: logging
|
|
|
|
# service name and port are used for the connection to your loki instance
|
|
service_name: loki-gateway
|
|
service_port: 80
|
|
|
|
ServiceMonitor:
|
|
selector:
|
|
matchLabels:
|
|
app.kubernetes.io/name: loki
|
|
app.kubernetes.io/component: logging
|
|
|
|
|
|
kube_monitor_proxy:
|
|
namespace: monitoring
|
|
|
|
|
|
kube_rbac_proxy:
|
|
# This image is used as part of kube-monitor-proxy.
|
|
image:
|
|
name: quay.io/brancz/kube-rbac-proxy
|
|
tag: 'v0.14.2'
|
|
|
|
|
|
kube_state_metrics:
|
|
image:
|
|
name: registry.k8s.io/kube-state-metrics/kube-state-metrics
|
|
tag: 'v2.8.1'
|
|
namespace: monitoring
|
|
|
|
|
|
prometheus:
|
|
|
|
image:
|
|
name: prom/prometheus
|
|
tag: 'v2.47.0'
|
|
|
|
ingress:
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: "selfsigned-issuer"
|
|
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
|
# enabled: false # Optional, boolean.
|
|
spec:
|
|
tls:
|
|
- hosts:
|
|
- prometheus.local
|
|
secretName: certificate-tls-prometheus
|
|
rules:
|
|
- host: prometheus.local
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend:
|
|
service:
|
|
name: prometheus-k8s
|
|
port:
|
|
name: web
|
|
|
|
# These labels are appended to all Prometheus items and are also the selector labels
|
|
labels:
|
|
app.kubernetes.io/component: prometheus
|
|
app.kubernetes.io/instance: k8s
|
|
app.kubernetes.io/name: prometheus
|
|
|
|
namespace: monitoring
|
|
|
|
affinity:
|
|
nodeAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- preference:
|
|
matchExpressions:
|
|
- key: node-role.kubernetes.io/worker
|
|
operator: Exists
|
|
weight: 100
|
|
- preference:
|
|
matchExpressions:
|
|
- key: node-role.kubernetes.io/storage
|
|
operator: DoesNotExist
|
|
weight: 100
|
|
podAntiAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- podAffinityTerm:
|
|
labelSelector:
|
|
matchExpressions:
|
|
- key: app.kubernetes.io/name
|
|
operator: In
|
|
values:
|
|
- prometheus
|
|
topologyKey: kubernetes.io/hostname
|
|
weight: 10
|
|
|
|
# List of namespaces that prometheus is to monitor
|
|
# used to create Roles and RoleBindings
|
|
# type: list
|
|
monitor_namespaces:
|
|
- alerting
|
|
- default
|
|
# - ceph
|
|
- grafana
|
|
- monitoring
|
|
# - kube-dashboard
|
|
# - kube-metrics
|
|
- kube-policy
|
|
- kube-system
|
|
- logging
|
|
# - mariadb
|
|
# - olm
|
|
# - operators
|
|
|
|
# Deploy a generate policy for kyverno to create Role and RoleBindings
|
|
# for the prometheus service account so it can monitor
|
|
# new/existing namespaces
|
|
kyverno_role_policy: true
|
|
|
|
storage:
|
|
volumeClaimTemplate:
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
resources:
|
|
requests:
|
|
storage: 40Gi
|
|
|
|
# Additional settings for Prometheus.
|
|
# See: https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.PrometheusSpec
|
|
# Type: dict
|
|
additional:
|
|
|
|
# remoteWrite:
|
|
# - name: mimir
|
|
# url: http://mimir-gateway.metrics.svc.cluster.local/api/v1/push
|
|
|
|
retention: 24h
|
|
retentionSize: 20GB
|
|
ruleSelector:
|
|
matchLabels:
|
|
role: alert-rules
|
|
|
|
|
|
prometheus_adaptor:
|
|
|
|
image:
|
|
name: registry.k8s.io/prometheus-adapter/prometheus-adapter
|
|
tag: 'v0.11.1'
|
|
|
|
labels:
|
|
app.kubernetes.io/component: metrics-adapter
|
|
app.kubernetes.io/instance: main
|
|
app.kubernetes.io/name: prometheus-adapter
|
|
|
|
namespace: monitoring
|
|
|
|
affinity:
|
|
nodeAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- preference:
|
|
matchExpressions:
|
|
- key: node-role.kubernetes.io/worker
|
|
operator: Exists
|
|
weight: 100
|
|
- preference:
|
|
matchExpressions:
|
|
- key: node-role.kubernetes.io/storage
|
|
operator: DoesNotExist
|
|
weight: 100
|
|
podAntiAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- podAffinityTerm:
|
|
labelSelector:
|
|
matchExpressions:
|
|
- key: app.kubernetes.io/name
|
|
operator: In
|
|
values:
|
|
- prometheus
|
|
topologyKey: kubernetes.io/hostname
|
|
weight: 10
|
|
|
|
thanos:
|
|
image:
|
|
name: thanosio/thanos
|
|
tag: v0.32.3
|
|
|
|
# Prometheus thanos sidecar
|
|
# see: https://thanos.io/tip/components/sidecar.md/
|
|
# Type: Dict
|
|
sidecar:
|
|
|
|
enabled: true
|
|
|
|
config:
|
|
type: S3
|
|
config:
|
|
bucket: "thanos-metrics"
|
|
endpoint: "rook-ceph-rgw-earth.ceph.svc:80"
|
|
access_key: "7J5NM2MNCDB4T4Y9OKJ5"
|
|
secret_key: "t9r69RzZdWEBL3NCKiUIpDk6j5625xc6HucusiGG"
|
|
insecure: true
|
|
|
|
|
|
additions:
|
|
|
|
ceph:
|
|
|
|
enabled: true
|
|
|
|
namespace: ceph
|
|
|
|
PrometheusRules: true
|
|
|
|
ServiceMonitor:
|
|
|
|
selector:
|
|
matchLabels:
|
|
app: rook-ceph-mgr
|
|
|
|
# Add sidecar to grafana pod to load dashboards from configMap
|
|
dashboard_sidecar:
|
|
|
|
enabled: true
|
|
|
|
image:
|
|
name: ghcr.io/kiwigrid/k8s-sidecar
|
|
tag: '1.24.5'
|
|
|
|
label_name: grafana_dashboard
|
|
label_value: "1"
|
|
|
|
|
|
network_policy:
|
|
|
|
enabled: true
|
|
|
|
# Network Policies to apply. These policies are automagically build using the values below.
|
|
# What you would find under path root.spec belongs here.
|
|
#
|
|
# Do:
|
|
# - Define 'Ingress'
|
|
# - Define 'Egress'
|
|
# - Ensure that the name matches the item name from values.yaml. i.e. nfc_monitoring.{item_name}
|
|
# for prometheus the item name is 'prometheus'. This value is used to select items pertaining to
|
|
# that item from values.yaml. for example the labels and namespace.
|
|
# Dont:
|
|
# - Define 'podSelector' as this is alreaady included using the selector labels
|
|
policies:
|
|
|
|
### SoF Network Policy: Prometheus ###
|
|
|
|
- name: prometheus
|
|
policy:
|
|
egress: # ToDo: add further restrictions to egress. is variable lookup possible to obtain values????
|
|
# - {}
|
|
- to: # Alert Manager
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: alerting
|
|
podSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/instance: main
|
|
app.kubernetes.io/component: alert-router
|
|
app.kubernetes.io/name: alertmanager
|
|
ports:
|
|
- port: 9093
|
|
protocol: TCP
|
|
|
|
- to: # Ceph
|
|
- ipBlock:
|
|
cidr: 172.16.10.0/24
|
|
ports:
|
|
- port: 9283
|
|
protocol: TCP
|
|
|
|
- to: # Grafana
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: grafana
|
|
podSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/component: graphing
|
|
app.kubernetes.io/instance: k8s
|
|
app.kubernetes.io/name: grafana
|
|
ports:
|
|
- port: 3000
|
|
protocol: TCP
|
|
|
|
- to: # Grafana Agent
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: monitoring
|
|
podSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/instance: k8s
|
|
app.kubernetes.io/component: exporter
|
|
app.kubernetes.io/name: grafana-agent
|
|
ports:
|
|
- port: 12345
|
|
protocol: TCP
|
|
|
|
- to: # Kube DNS
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: kube-system
|
|
podSelector:
|
|
matchLabels:
|
|
k8s-app: kube-dns
|
|
ports:
|
|
- port: 53
|
|
protocol: TCP
|
|
- port: 53
|
|
protocol: UDP
|
|
|
|
- to:
|
|
- podSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/name: prometheus
|
|
# namespaceSelector:
|
|
# matchLabels:
|
|
# kubernetes.io/metadata.name: monitoiring
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: metrics
|
|
ports: []
|
|
|
|
- {} # ToDo: Temp rule: Allow All. this rule MUST be removed when egress has been refactored
|
|
|
|
ingress:
|
|
|
|
- from:
|
|
- podSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/name: prometheus
|
|
# namespaceSelector:
|
|
# matchLabels:
|
|
# kubernetes.io/metadata.name: monitoiring
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: metrics
|
|
ports: []
|
|
# - port: 8080
|
|
# protocol: TCP
|
|
# - port: 9090
|
|
# protocol: TCP
|
|
# - port: 10901
|
|
# protocol: TCP
|
|
|
|
- from:
|
|
- podSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/name: grafana
|
|
namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: grafana
|
|
- podSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/name: prometheus-adapter
|
|
namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: monitoring
|
|
ports:
|
|
- port: 9090
|
|
protocol: TCP
|
|
|
|
- from: []
|
|
ports: []
|
|
|
|
policyTypes:
|
|
- Egress
|
|
- Ingress
|
|
|
|
### SoF Network Policy: Grafana ###
|
|
|
|
- name: grafana
|
|
policy:
|
|
egress:
|
|
|
|
- to:
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: alerting
|
|
podSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/instance: main
|
|
app.kubernetes.io/component: alert-router
|
|
app.kubernetes.io/name: alertmanager
|
|
ports:
|
|
- port: 9093
|
|
protocol: TCP
|
|
|
|
- to:
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: logging
|
|
podSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/component: gateway
|
|
app.kubernetes.io/instance: loki
|
|
app.kubernetes.io/name: loki
|
|
ports:
|
|
- port: 80 # Service Port
|
|
protocol: TCP
|
|
- port: 8080 # Pod Port
|
|
protocol: TCP
|
|
|
|
- to:
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: monitoring
|
|
podSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/component: prometheus
|
|
app.kubernetes.io/instance: k8s
|
|
app.kubernetes.io/name: prometheus
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: metrics
|
|
podSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/component: query-layer
|
|
app.kubernetes.io/instance: thanos-query
|
|
app.kubernetes.io/name: thanos-query
|
|
ports:
|
|
- port: 9090
|
|
protocol: TCP
|
|
|
|
- to: [] # Requires internet access for plugins and dashboard downloading
|
|
ports:
|
|
- port: 443
|
|
protocol: TCP
|
|
|
|
- to: # Kube DNS
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: kube-system
|
|
podSelector:
|
|
matchLabels:
|
|
k8s-app: kube-dns
|
|
ports:
|
|
- port: 53
|
|
protocol: TCP
|
|
- port: 53
|
|
protocol: UDP
|
|
|
|
ingress:
|
|
|
|
- from: []
|
|
ports:
|
|
- port: 3000
|
|
protocol: TCP
|
|
policyTypes:
|
|
- Egress
|
|
- Ingress
|
|
|
|
### SoF Network Policy: Grafana Agent ###
|
|
|
|
- name: grafana_agent
|
|
policy:
|
|
egress:
|
|
|
|
- to: # Logging
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: logging
|
|
podSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/component: gateway
|
|
app.kubernetes.io/instance: loki
|
|
app.kubernetes.io/name: loki
|
|
ports:
|
|
- port: 80
|
|
protocol: TCP
|
|
|
|
- to: # Kube DNS
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: kube-system
|
|
podSelector:
|
|
matchLabels:
|
|
k8s-app: kube-dns
|
|
ports:
|
|
- port: 53
|
|
protocol: TCP
|
|
- port: 53
|
|
protocol: UDP
|
|
|
|
ingress:
|
|
|
|
- from:
|
|
- namespaceSelector:
|
|
matchLabels:
|
|
kubernetes.io/metadata.name: monitoring
|
|
podSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/component: prometheus
|
|
app.kubernetes.io/instance: k8s
|
|
app.kubernetes.io/name: prometheus
|
|
ports:
|
|
- port: 12345
|
|
protocol: TCP
|
|
|
|
policyTypes:
|
|
- Egress
|
|
- Ingress
|
|
|
|
|
|
|
|
loki_instance:
|
|
image:
|
|
name: grafana/loki
|
|
tag: 2.7.4
|
|
# tag: 2.9.0
|
|
namespace: loki
|
|
|
|
|
|
oncall_instance:
|
|
image:
|
|
name: grafana/oncall
|
|
tag: v1.1.40
|
|
|
|
|
|
# oncall:
|
|
|
|
# # image:
|
|
# # # Grafana OnCall docker image repository
|
|
# # repository: grafana/oncall
|
|
# # tag: v1.1.38
|
|
# # pullPolicy: Always
|
|
|
|
# service:
|
|
# enabled: false
|
|
# type: LoadBalancer
|
|
# port: 8080
|
|
# annotations: {}
|
|
|
|
# engine:
|
|
# replicaCount: 1
|
|
# resources:
|
|
# limits:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
|
|
# celery:
|
|
# replicaCount: 1
|
|
# resources:
|
|
# limits:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
# database:
|
|
# type: none
|