Files
kubernetes_monitoring/values.yaml

495 lines
11 KiB
YAML

---
# All values within this helm chart values.yaml file are under namespace `nfc_monitoring`.
# this provides the opportunity to include this helm chart as a dependency without
# variable collision
nfc_monitoring:
kubernetes:
cluster_dns_name: cluster.local
networking: calico
alert_manager:
enabled: true
image:
name: quay.io/prometheus/alertmanager
tag: 'v0.26.0'
# How many replicas to deploy
replicas: 1
ingress:
annotations:
cert-manager.io/cluster-issuer: "selfsigned-issuer"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
enabled: false
hostname: alert-manager.local
labels:
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
namespace: alerting
grafana:
dashboards:
cert_manager: false
enabled: false
# Grafana Configuration
# Type: Dict
# See: https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana
config:
analytics:
enabled: 'false'
# database:
# type: mysql
# host: mariadb-galera.mariadb.svc:3306
# name: grafana
# user: root
# password: admin
log:
mode: "console"
auth:
disable_login_form: "false"
security:
admin_user: admin
admin_password: admin
image:
name: grafana/grafana
tag: '10.3.1' # '10.0.5'
ingress:
annotations:
cert-manager.io/cluster-issuer: "selfsigned-issuer"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
enabled: true
hostname: grafana.local
labels:
app.kubernetes.io/component: graphing
app.kubernetes.io/name: grafana
namespace: grafana
replicas: 1
# storage_accessModes: ReadWriteMany
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
weight: 100
- preference:
matchExpressions:
- key: node-role.kubernetes.io/storage
operator: DoesNotExist
weight: 100
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- prometheus
topologyKey: kubernetes.io/hostname
weight: 10
# To add Grafan datasources
# Type: list
# See: https://grafana.com/docs/grafana/latest/administration/provisioning/#data-sources
DataSources:
- name: alertmanager
type: alertmanager
access: proxy
url: "http://alertmanager-main.{{ .Values.nfc_monitoring.alert_manager.namespace }}.svc:9093"
isDefault: false
jsonData:
tlsSkipVerify: true
timeInterval: "5s"
implementation: prometheus
handleGrafanaManagedAlerts: false
orgId: 1
editable: true
- name: loki
type: loki
access: proxy
url: "http://{{ .Values.nfc_monitoring.loki.service_name }}.{{ .Values.nfc_monitoring.loki.namespace }}.svc.{{ .Values.nfc_monitoring.kubernetes.cluster_dns_name }}:{{ .Values.nfc_monitoring.loki.service_port }}"
isDefault: false
jsonData:
orgId: 1
editable: true
# - name: mimir
# type: prometheus
# access: proxy
# url: "http://mimir-gateway.metrics.svc.cluster.local/prometheus"
# isDefault: false
# jsonData:
# manageAlerts: true
# orgId: 1
# prometheusType: Mimir
# editable: true
# - name: prometheus
# type: prometheus
# access: proxy
# url: "http://prometheus-k8s.{{ .Values.nfc_monitoring.prometheus.namespace }}.svc:9090"
# isDefault: true
# jsonData:
# manageAlerts: true
# orgId: 1
# prometheusType: Prometheus
# prometheusVersion: 2.42.0
# editable: true
- name: thanos
type: prometheus
access: proxy
url: "http://thanos-query.metrics.svc:9090"
isDefault: true
jsonData:
manageAlerts: true
orgId: 1
prometheusType: Thanos
prometheusVersion: 0.31.0
editable: true
grafana_agent:
enabled: true
image:
name: grafana/agent
tag: 'v0.39.2'
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: grafana-agent
namespace: monitoring
loki:
enabled: true
image:
name: grafana/loki
tag: 2.7.4
namespace: logging
# If no config is setup, logging will not be enabled.
config: {}
# service name and port are used for the connection to your loki instance
# service_name: loki-gateway
# service_port: 80
ServiceMonitor:
selector:
matchLabels:
app.kubernetes.io/name: loki
app.kubernetes.io/component: logging
kube_monitor_proxy:
enabled: false
namespace: monitoring
kube_rbac_proxy:
# This image is used as part of kube-monitor-proxy.
image:
name: quay.io/brancz/kube-rbac-proxy
tag: 'v0.14.2'
kube_state_metrics:
enabled: false
image:
name: registry.k8s.io/kube-state-metrics/kube-state-metrics
tag: 'v2.8.1'
namespace: monitoring
prometheus:
image:
name: prom/prometheus
tag: 'v2.49.0'
# How many replicas to deploy
replicas: 1
# alertmanagers:
# - name:
# Configure prometheus to write metrics to remote host
# below example config uses a secret named "prometheus-remote-write" with two keys username and password.
# Documentation: https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.RemoteWriteSpec
remotewrite: {}
# url:
# name:
# remoteTimeout: 30
# writeRelabelConfigs:
# basicAuth:
# username:
# name: prometheus-remote-write
# key: username
# password:
# name: prometheus-remote-write
# key: password
ingress:
annotations:
cert-manager.io/cluster-issuer: "selfsigned-issuer"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
enabled: true
hostname: prometheus.local
# These labels are appended to all Prometheus items and are also the selector labels
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
namespace: monitoring
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
weight: 100
- preference:
matchExpressions:
- key: node-role.kubernetes.io/storage
operator: DoesNotExist
weight: 100
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- prometheus
topologyKey: kubernetes.io/hostname
weight: 10
# Deploy a generate policy for kyverno to create Role and RoleBindings
# for the prometheus service account so it can monitor
# new/existing namespaces
kyverno_role_policy: false
storage:
volumeClaimTemplate:
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 40Gi
# Additional settings for Prometheus.
# See: https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.PrometheusSpec
# Type: dict
additional:
# Don't declare remoteWrite Here, as it's don at path .prometheus.remote_write
# remoteWrite:
retention: 24h
retentionSize: 2GB
ruleSelector:
matchLabels:
role: alert-rules
service_monitor:
apiserver: false
cadvisor: false
calico: false
ceph: false
coredns: false
kube_controller_manager: false
kubelet: false
kube_scheduler: false
prometheus_adaptor:
enalbed: false
image:
name: registry.k8s.io/prometheus-adapter/prometheus-adapter
tag: 'v0.11.1'
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
namespace: monitoring
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
weight: 100
- preference:
matchExpressions:
- key: node-role.kubernetes.io/storage
operator: DoesNotExist
weight: 100
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- prometheus
topologyKey: kubernetes.io/hostname
weight: 10
thanos:
image:
name: thanosio/thanos
tag: v0.32.3
# Prometheus thanos sidecar
# see: https://thanos.io/tip/components/sidecar.md/
# Type: Dict
sidecar:
enabled: true
# Config must be specified for the sidecar to deploy
config: {}
# type: S3
# config:
# bucket: "thanos-metrics"
# endpoint: "rook-ceph-rgw-earth.ceph.svc:80"
# access_key: "7J5NM2MNCDB4T4Y9OKJ5"
# secret_key: "t9r69RzZdWEBL3NCKiUIpDk6j5625xc6HucusiGG"
# insecure: true
additions:
ceph:
enabled: false
namespace: ceph
PrometheusRules: true
ServiceMonitor:
selector:
matchLabels:
app: rook-ceph-mgr
# Add sidecar to grafana pod to load dashboards from configMap
dashboard_sidecar:
enabled: false
image:
name: ghcr.io/kiwigrid/k8s-sidecar
tag: '1.24.5'
label_name: grafana_dashboard
label_value: "1"
network_policy:
enabled: false
loki_instance:
image:
name: grafana/loki
tag: 2.7.4
# tag: 2.9.0
namespace: loki
oncall_instance:
image:
name: grafana/oncall
tag: v1.1.40
# oncall:
# # image:
# # # Grafana OnCall docker image repository
# # repository: grafana/oncall
# # tag: v1.1.38
# # pullPolicy: Always
# service:
# enabled: false
# type: LoadBalancer
# port: 8080
# annotations: {}
# engine:
# replicaCount: 1
# resources:
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
# celery:
# replicaCount: 1
# resources:
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
# database:
# type: none