--- # All values within this helm chart values.yaml file are under namespace `nfc_monitoring`. # this provides the opportunity to include this helm chart as a dependency without # variable collision nfc_monitoring: kubernetes: cluster_dns_name: cluster.local networking: calico alert_manager: enabled: true image: name: quay.io/prometheus/alertmanager tag: 'v0.26.0' # How many replicas to deploy replicas: 1 ingress: annotations: cert-manager.io/cluster-issuer: "selfsigned-issuer" nginx.ingress.kubernetes.io/ssl-redirect: "true" enabled: false hostname: alert-manager.local labels: app.kubernetes.io/component: alert-router app.kubernetes.io/name: alertmanager namespace: alerting grafana: dashboards: cert_manager: false enabled: false # Grafana Configuration # Type: Dict # See: https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana config: analytics: enabled: 'false' # database: # type: mysql # host: mariadb-galera.mariadb.svc:3306 # name: grafana # user: root # password: admin log: mode: "console" auth: disable_login_form: "false" security: admin_user: admin admin_password: admin image: name: grafana/grafana tag: '10.3.1' # '10.0.5' ingress: annotations: cert-manager.io/cluster-issuer: "selfsigned-issuer" nginx.ingress.kubernetes.io/ssl-redirect: "true" enabled: true hostname: grafana.local labels: app.kubernetes.io/component: graphing app.kubernetes.io/name: grafana namespace: grafana replicas: 1 # storage_accessModes: ReadWriteMany affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: - preference: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists weight: 100 - preference: matchExpressions: - key: node-role.kubernetes.io/storage operator: DoesNotExist weight: 100 podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: - podAffinityTerm: labelSelector: matchExpressions: - key: app.kubernetes.io/name operator: In values: - prometheus topologyKey: kubernetes.io/hostname weight: 10 # To add Grafan datasources # Type: list # See: https://grafana.com/docs/grafana/latest/administration/provisioning/#data-sources DataSources: - name: alertmanager type: alertmanager access: proxy url: "http://alertmanager-main.{{ .Values.nfc_monitoring.alert_manager.namespace }}.svc:9093" isDefault: false jsonData: tlsSkipVerify: true timeInterval: "5s" implementation: prometheus handleGrafanaManagedAlerts: false orgId: 1 editable: true - name: loki type: loki access: proxy url: "http://{{ .Values.nfc_monitoring.loki.service_name }}.{{ .Values.nfc_monitoring.loki.namespace }}.svc.{{ .Values.nfc_monitoring.kubernetes.cluster_dns_name }}:{{ .Values.nfc_monitoring.loki.service_port }}" isDefault: false jsonData: orgId: 1 editable: true # - name: mimir # type: prometheus # access: proxy # url: "http://mimir-gateway.metrics.svc.cluster.local/prometheus" # isDefault: false # jsonData: # manageAlerts: true # orgId: 1 # prometheusType: Mimir # editable: true # - name: prometheus # type: prometheus # access: proxy # url: "http://prometheus-k8s.{{ .Values.nfc_monitoring.prometheus.namespace }}.svc:9090" # isDefault: true # jsonData: # manageAlerts: true # orgId: 1 # prometheusType: Prometheus # prometheusVersion: 2.42.0 # editable: true - name: thanos type: prometheus access: proxy url: "http://thanos-query.metrics.svc:9090" isDefault: true jsonData: manageAlerts: true orgId: 1 prometheusType: Thanos prometheusVersion: 0.31.0 editable: true grafana_agent: enabled: true image: name: grafana/agent tag: 'v0.39.2' labels: app.kubernetes.io/component: exporter app.kubernetes.io/name: grafana-agent namespace: monitoring loki: enabled: true image: name: grafana/loki tag: 2.7.4 namespace: logging # If no config is setup, logging will not be enabled. config: {} # service name and port are used for the connection to your loki instance # service_name: loki-gateway # service_port: 80 ServiceMonitor: selector: matchLabels: app.kubernetes.io/name: loki app.kubernetes.io/component: logging kube_monitor_proxy: enabled: false namespace: monitoring kube_rbac_proxy: # This image is used as part of kube-monitor-proxy. image: name: quay.io/brancz/kube-rbac-proxy tag: 'v0.14.2' kube_state_metrics: enabled: false image: name: registry.k8s.io/kube-state-metrics/kube-state-metrics tag: 'v2.8.1' namespace: monitoring prometheus: image: name: prom/prometheus tag: 'v2.49.0' # How many replicas to deploy replicas: 1 # alertmanagers: # - name: # Configure prometheus to write metrics to remote host # below example config uses a secret named "prometheus-remote-write" with two keys username and password. # Documentation: https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.RemoteWriteSpec remotewrite: {} # url: # name: # remoteTimeout: 30 # writeRelabelConfigs: # basicAuth: # username: # name: prometheus-remote-write # key: username # password: # name: prometheus-remote-write # key: password ingress: annotations: cert-manager.io/cluster-issuer: "selfsigned-issuer" nginx.ingress.kubernetes.io/ssl-redirect: "true" enabled: true hostname: prometheus.local # These labels are appended to all Prometheus items and are also the selector labels labels: app.kubernetes.io/component: prometheus app.kubernetes.io/name: prometheus namespace: monitoring affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: - preference: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists weight: 100 - preference: matchExpressions: - key: node-role.kubernetes.io/storage operator: DoesNotExist weight: 100 podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: - podAffinityTerm: labelSelector: matchExpressions: - key: app.kubernetes.io/name operator: In values: - prometheus topologyKey: kubernetes.io/hostname weight: 10 # Deploy a generate policy for kyverno to create Role and RoleBindings # for the prometheus service account so it can monitor # new/existing namespaces kyverno_role_policy: false storage: volumeClaimTemplate: spec: accessModes: - ReadWriteOnce resources: requests: storage: 40Gi # Additional settings for Prometheus. # See: https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.PrometheusSpec # Type: dict additional: # Don't declare remoteWrite Here, as it's don at path .prometheus.remote_write # remoteWrite: retention: 24h retentionSize: 2GB ruleSelector: matchLabels: role: alert-rules service_monitor: apiserver: false cadvisor: false calico: false ceph: false coredns: false kube_controller_manager: false kubelet: false kube_scheduler: false prometheus_adaptor: enalbed: false image: name: registry.k8s.io/prometheus-adapter/prometheus-adapter tag: 'v0.11.1' labels: app.kubernetes.io/component: metrics-adapter app.kubernetes.io/name: prometheus-adapter namespace: monitoring affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: - preference: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists weight: 100 - preference: matchExpressions: - key: node-role.kubernetes.io/storage operator: DoesNotExist weight: 100 podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: - podAffinityTerm: labelSelector: matchExpressions: - key: app.kubernetes.io/name operator: In values: - prometheus topologyKey: kubernetes.io/hostname weight: 10 thanos: image: name: thanosio/thanos tag: v0.32.3 # Prometheus thanos sidecar # see: https://thanos.io/tip/components/sidecar.md/ # Type: Dict sidecar: enabled: true # Config must be specified for the sidecar to deploy config: {} # type: S3 # config: # bucket: "thanos-metrics" # endpoint: "rook-ceph-rgw-earth.ceph.svc:80" # access_key: "7J5NM2MNCDB4T4Y9OKJ5" # secret_key: "t9r69RzZdWEBL3NCKiUIpDk6j5625xc6HucusiGG" # insecure: true additions: ceph: enabled: false namespace: ceph PrometheusRules: true ServiceMonitor: selector: matchLabels: app: rook-ceph-mgr # Add sidecar to grafana pod to load dashboards from configMap dashboard_sidecar: enabled: false image: name: ghcr.io/kiwigrid/k8s-sidecar tag: '1.24.5' label_name: grafana_dashboard label_value: "1" network_policy: enabled: false loki_instance: image: name: grafana/loki tag: 2.7.4 # tag: 2.9.0 namespace: loki oncall_instance: image: name: grafana/oncall tag: v1.1.40 # oncall: # # image: # # # Grafana OnCall docker image repository # # repository: grafana/oncall # # tag: v1.1.38 # # pullPolicy: Always # service: # enabled: false # type: LoadBalancer # port: 8080 # annotations: {} # engine: # replicaCount: 1 # resources: # limits: # cpu: 100m # memory: 128Mi # requests: # cpu: 100m # memory: 128Mi # celery: # replicaCount: 1 # resources: # limits: # cpu: 100m # memory: 128Mi # requests: # cpu: 100m # memory: 128Mi # database: # type: none